Path: blob/main/crates/polars-expr/src/hot_groups/fixed_index_table.rs
6940 views
use polars_utils::IdxSize;1use polars_utils::select::select_unpredictable;2use polars_utils::vec::PushUnchecked;34use crate::EvictIdx;56const H2_MULT: u64 = 0xf1357aea2e62a9c5;78#[derive(Clone)]9struct Slot {10tag: u32,11last_access_tag: u32,12key_index: IdxSize,13}1415/// A fixed-size hash table which maps keys to indices.16///17/// Instead of growing indefinitely this table will evict keys instead.18pub struct FixedIndexTable<K> {19slots: Vec<Slot>,20keys: Vec<K>,21num_filled_slots: usize, // Possibly different than keys.len() because of push_unmapped_key.22shift: u8,23prng: u64,24}2526impl<K> FixedIndexTable<K> {27pub fn new(num_slots: IdxSize) -> Self {28assert!(num_slots.is_power_of_two());29assert!(num_slots > 1);30let empty_slot = Slot {31tag: u32::MAX,32last_access_tag: u32::MAX,33key_index: IdxSize::MAX,34};35Self {36slots: vec![empty_slot; num_slots as usize],37shift: 64 - num_slots.trailing_zeros() as u8,38num_filled_slots: 0,39// We add one to the capacity for the null key.40keys: Vec::with_capacity(1 + num_slots as usize),41prng: 0,42}43}4445pub fn len(&self) -> usize {46self.keys.len()47}4849/// Insert a key which will never be mapped to nor evicted.50///51/// This is useful for permanent entries which are handled externally.52/// Returns the key index this would have taken up.53pub fn push_unmapped_key(&mut self, key: K) -> IdxSize {54let idx = self.keys.len();55self.keys.push(key);56idx as IdxSize57}5859/// Tries to insert a key with a given hash.60///61/// Returns Some((index, evict_old)) if successful, None otherwise.62pub fn insert_key<Q, E, I, V>(63&mut self,64hash: u64,65key: Q,66force_insert: bool,67mut eq: E,68mut insert: I,69mut evict_insert: V,70) -> Option<EvictIdx>71where72E: FnMut(&Q, &K) -> bool,73I: FnMut(Q) -> K,74V: FnMut(Q, &mut K),75{76let tag = hash as u32;77let h1 = (hash >> self.shift) as usize;78let h2 = (hash.wrapping_mul(H2_MULT) >> self.shift) as usize;7980unsafe {81// We only want a single branch for the hot hit/miss check. This is82// why we check both slots at once.83let s1 = self.slots.get_unchecked(h1);84let s2 = self.slots.get_unchecked(h2);85let s1_delta = s1.tag ^ tag;86let s2_delta = s2.tag ^ tag;87// This check can have false positives (the binary AND of the deltas88// happens to be zero by accident), but this is very unlikely (~1/10k)89// and harmless if it does. False negatives are impossible. If this90// branch succeeds we almost surely have a hit, if it fails91// we're certain we have a miss.92if s1_delta & s2_delta == 0 {93// We want to branchlessly select the most likely candidate94// first to ensure no further branch mispredicts in the vast95// majority of cases.96let ha = select_unpredictable(s1_delta == 0, h1, h2);97let sa = self.slots.get_unchecked_mut(ha);98if let Some(sak) = self.keys.get(sa.key_index as usize) {99if eq(&key, sak) {100sa.last_access_tag = tag;101return Some(EvictIdx::new(sa.key_index, false));102}103}104105// If both hashes matched we have to check the second slot too.106if s1_delta == s2_delta {107let hb = h1 ^ h2 ^ ha;108let sb = self.slots.get_unchecked_mut(hb);109if let Some(sbk) = self.keys.get(sb.key_index as usize) {110if eq(&key, sbk) {111sb.last_access_tag = tag;112return Some(EvictIdx::new(sb.key_index, false));113}114}115}116}117118// Check if we can insert into an empty slot.119let num_keys = self.keys.len() as IdxSize;120if self.num_filled_slots < self.slots.len() {121// Check the first slot.122let s1 = self.slots.get_unchecked_mut(h1);123if s1.key_index >= num_keys {124s1.tag = tag;125s1.last_access_tag = tag;126s1.key_index = num_keys;127self.keys.push_unchecked(insert(key));128self.num_filled_slots += 1;129return Some(EvictIdx::new(s1.key_index, false));130}131132// Check the second slot.133let s2 = self.slots.get_unchecked_mut(h2);134if s2.key_index >= num_keys {135s2.tag = tag;136s2.last_access_tag = tag;137s2.key_index = num_keys;138self.keys.push_unchecked(insert(key));139self.num_filled_slots += 1;140return Some(EvictIdx::new(s2.key_index, false));141}142}143144// Randomly try to evict one of the two slots.145let hr = select_unpredictable(self.prng >> 63 != 0, h1, h2);146self.prng = self.prng.wrapping_add(hash);147let slot = self.slots.get_unchecked_mut(hr);148149if (slot.last_access_tag == tag) | force_insert {150slot.tag = tag;151let evict_key = self.keys.get_unchecked_mut(slot.key_index as usize);152evict_insert(key, evict_key);153Some(EvictIdx::new(slot.key_index, true))154} else {155slot.last_access_tag = tag;156None157}158}159}160161pub fn keys(&self) -> &[K] {162&self.keys163}164}165166167