Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-expr/src/hot_groups/fixed_index_table.rs
6940 views
1
use polars_utils::IdxSize;
2
use polars_utils::select::select_unpredictable;
3
use polars_utils::vec::PushUnchecked;
4
5
use crate::EvictIdx;
6
7
const H2_MULT: u64 = 0xf1357aea2e62a9c5;
8
9
#[derive(Clone)]
10
struct Slot {
11
tag: u32,
12
last_access_tag: u32,
13
key_index: IdxSize,
14
}
15
16
/// A fixed-size hash table which maps keys to indices.
17
///
18
/// Instead of growing indefinitely this table will evict keys instead.
19
pub struct FixedIndexTable<K> {
20
slots: Vec<Slot>,
21
keys: Vec<K>,
22
num_filled_slots: usize, // Possibly different than keys.len() because of push_unmapped_key.
23
shift: u8,
24
prng: u64,
25
}
26
27
impl<K> FixedIndexTable<K> {
28
pub fn new(num_slots: IdxSize) -> Self {
29
assert!(num_slots.is_power_of_two());
30
assert!(num_slots > 1);
31
let empty_slot = Slot {
32
tag: u32::MAX,
33
last_access_tag: u32::MAX,
34
key_index: IdxSize::MAX,
35
};
36
Self {
37
slots: vec![empty_slot; num_slots as usize],
38
shift: 64 - num_slots.trailing_zeros() as u8,
39
num_filled_slots: 0,
40
// We add one to the capacity for the null key.
41
keys: Vec::with_capacity(1 + num_slots as usize),
42
prng: 0,
43
}
44
}
45
46
pub fn len(&self) -> usize {
47
self.keys.len()
48
}
49
50
/// Insert a key which will never be mapped to nor evicted.
51
///
52
/// This is useful for permanent entries which are handled externally.
53
/// Returns the key index this would have taken up.
54
pub fn push_unmapped_key(&mut self, key: K) -> IdxSize {
55
let idx = self.keys.len();
56
self.keys.push(key);
57
idx as IdxSize
58
}
59
60
/// Tries to insert a key with a given hash.
61
///
62
/// Returns Some((index, evict_old)) if successful, None otherwise.
63
pub fn insert_key<Q, E, I, V>(
64
&mut self,
65
hash: u64,
66
key: Q,
67
force_insert: bool,
68
mut eq: E,
69
mut insert: I,
70
mut evict_insert: V,
71
) -> Option<EvictIdx>
72
where
73
E: FnMut(&Q, &K) -> bool,
74
I: FnMut(Q) -> K,
75
V: FnMut(Q, &mut K),
76
{
77
let tag = hash as u32;
78
let h1 = (hash >> self.shift) as usize;
79
let h2 = (hash.wrapping_mul(H2_MULT) >> self.shift) as usize;
80
81
unsafe {
82
// We only want a single branch for the hot hit/miss check. This is
83
// why we check both slots at once.
84
let s1 = self.slots.get_unchecked(h1);
85
let s2 = self.slots.get_unchecked(h2);
86
let s1_delta = s1.tag ^ tag;
87
let s2_delta = s2.tag ^ tag;
88
// This check can have false positives (the binary AND of the deltas
89
// happens to be zero by accident), but this is very unlikely (~1/10k)
90
// and harmless if it does. False negatives are impossible. If this
91
// branch succeeds we almost surely have a hit, if it fails
92
// we're certain we have a miss.
93
if s1_delta & s2_delta == 0 {
94
// We want to branchlessly select the most likely candidate
95
// first to ensure no further branch mispredicts in the vast
96
// majority of cases.
97
let ha = select_unpredictable(s1_delta == 0, h1, h2);
98
let sa = self.slots.get_unchecked_mut(ha);
99
if let Some(sak) = self.keys.get(sa.key_index as usize) {
100
if eq(&key, sak) {
101
sa.last_access_tag = tag;
102
return Some(EvictIdx::new(sa.key_index, false));
103
}
104
}
105
106
// If both hashes matched we have to check the second slot too.
107
if s1_delta == s2_delta {
108
let hb = h1 ^ h2 ^ ha;
109
let sb = self.slots.get_unchecked_mut(hb);
110
if let Some(sbk) = self.keys.get(sb.key_index as usize) {
111
if eq(&key, sbk) {
112
sb.last_access_tag = tag;
113
return Some(EvictIdx::new(sb.key_index, false));
114
}
115
}
116
}
117
}
118
119
// Check if we can insert into an empty slot.
120
let num_keys = self.keys.len() as IdxSize;
121
if self.num_filled_slots < self.slots.len() {
122
// Check the first slot.
123
let s1 = self.slots.get_unchecked_mut(h1);
124
if s1.key_index >= num_keys {
125
s1.tag = tag;
126
s1.last_access_tag = tag;
127
s1.key_index = num_keys;
128
self.keys.push_unchecked(insert(key));
129
self.num_filled_slots += 1;
130
return Some(EvictIdx::new(s1.key_index, false));
131
}
132
133
// Check the second slot.
134
let s2 = self.slots.get_unchecked_mut(h2);
135
if s2.key_index >= num_keys {
136
s2.tag = tag;
137
s2.last_access_tag = tag;
138
s2.key_index = num_keys;
139
self.keys.push_unchecked(insert(key));
140
self.num_filled_slots += 1;
141
return Some(EvictIdx::new(s2.key_index, false));
142
}
143
}
144
145
// Randomly try to evict one of the two slots.
146
let hr = select_unpredictable(self.prng >> 63 != 0, h1, h2);
147
self.prng = self.prng.wrapping_add(hash);
148
let slot = self.slots.get_unchecked_mut(hr);
149
150
if (slot.last_access_tag == tag) | force_insert {
151
slot.tag = tag;
152
let evict_key = self.keys.get_unchecked_mut(slot.key_index as usize);
153
evict_insert(key, evict_key);
154
Some(EvictIdx::new(slot.key_index, true))
155
} else {
156
slot.last_access_tag = tag;
157
None
158
}
159
}
160
}
161
162
pub fn keys(&self) -> &[K] {
163
&self.keys
164
}
165
}
166
167