Path: blob/main/crates/polars-expr/src/hot_groups/binview.rs
6940 views
use arrow::array::builder::StaticArrayBuilder;1use arrow::array::{BinaryViewArrayGenericBuilder, PrimitiveArray, View};2use arrow::bitmap::MutableBitmap;3use arrow::buffer::Buffer;4use polars_utils::vec::PushUnchecked;56use super::*;7use crate::hash_keys::BinviewKeys;8use crate::hot_groups::fixed_index_table::FixedIndexTable;910pub struct BinviewHashHotGrouper {11// The views in this table when not inline are stored in the vec.12table: FixedIndexTable<(u64, View, Vec<u8>)>,13evicted_key_hashes: Vec<u64>,14evicted_keys: BinaryViewArrayGenericBuilder<[u8]>,15null_idx: IdxSize,16}1718impl BinviewHashHotGrouper {19pub fn new(max_groups: usize) -> Self {20Self {21table: FixedIndexTable::new(max_groups.try_into().unwrap()),22evicted_key_hashes: Vec::new(),23evicted_keys: BinaryViewArrayGenericBuilder::new(ArrowDataType::BinaryView),24null_idx: IdxSize::MAX,25}26}2728/// # Safety29/// The view must be valid for the given buffer set.30#[inline(always)]31unsafe fn insert_key(32&mut self,33hash: u64,34view: View,35force_hot: bool,36buffers: &Arc<[Buffer<u8>]>,37) -> Option<EvictIdx> {38unsafe {39let mut evict = |ev_h: &u64, ev_view: &View, ev_buffer: &Vec<u8>| {40self.evicted_key_hashes.push(*ev_h);41if ev_view.is_inline() {42self.evicted_keys.push_inline_view_ignore_validity(*ev_view);43} else {44self.evicted_keys45.push_value_ignore_validity(ev_buffer.as_slice());46}47};48if view.is_inline() {49self.table.insert_key(50hash,51(),52force_hot,53|_, b| view == b.1,54|_| (hash, view, Vec::new()),55|_, ev_k| {56let (ev_h, ev_view, ev_buffer) = ev_k;57evict(ev_h, ev_view, ev_buffer);58*ev_h = hash;59*ev_view = view;60ev_buffer.clear();61},62)63} else {64let bytes = view.get_external_slice_unchecked(buffers);65self.table.insert_key(66hash,67(),68force_hot,69|_, b| {70// We only reach here if the hash matched, so jump straight to full comparison.71bytes == b.272},73|_| (hash, view, bytes.to_vec()),74|_, ev_k| {75let (ev_h, ev_view, ev_buffer) = ev_k;76evict(ev_h, ev_view, ev_buffer);77*ev_h = hash;78*ev_view = view;79ev_buffer.clear();80ev_buffer.extend_from_slice(bytes);81},82)83}84}85}8687#[inline(always)]88fn insert_null(&mut self) -> Option<EvictIdx> {89if self.null_idx == IdxSize::MAX {90self.null_idx = self91.table92.push_unmapped_key((0, View::default(), Vec::new()));93}94Some(EvictIdx::new(self.null_idx, false))95}96}9798impl HotGrouper for BinviewHashHotGrouper {99fn new_empty(&self, max_groups: usize) -> Box<dyn HotGrouper> {100Box::new(Self::new(max_groups))101}102103fn num_groups(&self) -> IdxSize {104self.table.len() as IdxSize105}106107fn insert_keys(108&mut self,109hash_keys: &HashKeys,110hot_idxs: &mut Vec<IdxSize>,111hot_group_idxs: &mut Vec<EvictIdx>,112cold_idxs: &mut Vec<IdxSize>,113force_hot: bool,114) {115let HashKeys::Binview(hash_keys) = hash_keys else {116unreachable!()117};118119hot_idxs.reserve(hash_keys.keys.len());120hot_group_idxs.reserve(hash_keys.keys.len());121cold_idxs.reserve(hash_keys.keys.len());122123let mut push_g = |idx: usize, opt_g: Option<EvictIdx>| unsafe {124if let Some(g) = opt_g {125hot_idxs.push_unchecked(idx as IdxSize);126hot_group_idxs.push_unchecked(g);127} else {128cold_idxs.push_unchecked(idx as IdxSize);129}130};131132unsafe {133let views = hash_keys.keys.views().as_slice();134let buffers = hash_keys.keys.data_buffers();135if hash_keys.null_is_valid {136hash_keys.for_each_hash(|idx, opt_h| {137if let Some(h) = opt_h {138let view = views.get_unchecked(idx as usize);139push_g(idx as usize, self.insert_key(h, *view, force_hot, buffers));140} else {141push_g(idx as usize, self.insert_null());142}143});144} else {145hash_keys.for_each_hash(|idx, opt_h| {146if let Some(h) = opt_h {147let view = views.get_unchecked(idx as usize);148push_g(idx as usize, self.insert_key(h, *view, force_hot, buffers));149}150});151}152}153}154155fn keys(&self) -> HashKeys {156unsafe {157let mut hashes = Vec::with_capacity(self.table.len());158let mut keys_builder = BinaryViewArrayGenericBuilder::new(ArrowDataType::BinaryView);159keys_builder.reserve(self.table.len());160for (h, view, buf) in self.table.keys() {161hashes.push_unchecked(*h);162if view.is_inline() {163keys_builder.push_inline_view_ignore_validity(*view);164} else {165keys_builder.push_value_ignore_validity(buf.as_slice());166}167}168169let hashes = PrimitiveArray::from_vec(hashes);170let mut keys = keys_builder.freeze();171let null_is_valid = self.null_idx < IdxSize::MAX;172if null_is_valid {173let mut validity = MutableBitmap::new();174validity.extend_constant(keys.len(), true);175validity.set(self.null_idx as usize, false);176keys = keys.with_validity_typed(Some(validity.freeze()));177}178HashKeys::Binview(BinviewKeys {179hashes,180keys,181null_is_valid,182})183}184}185186fn num_evictions(&self) -> usize {187self.evicted_keys.len()188}189190fn take_evicted_keys(&mut self) -> HashKeys {191let hashes = core::mem::take(&mut self.evicted_key_hashes);192let keys = self.evicted_keys.freeze_reset();193HashKeys::Binview(BinviewKeys {194hashes: PrimitiveArray::from_vec(hashes),195keys,196null_is_valid: false,197})198}199200fn as_any(&self) -> &dyn Any {201self202}203}204205206