Path: blob/main/crates/polars-core/src/chunked_array/binary.rs
6940 views
use std::hash::BuildHasher;12use polars_utils::aliases::PlRandomState;3use polars_utils::hashing::BytesHash;4use rayon::prelude::*;56use crate::POOL;7use crate::prelude::*;8use crate::utils::{_set_partition_size, _split_offsets};910#[inline]11fn fill_bytes_hashes<'a, T>(12ca: &'a ChunkedArray<T>,13null_h: u64,14hb: PlRandomState,15) -> Vec<BytesHash<'a>>16where17T: PolarsDataType,18<<T as PolarsDataType>::Array as StaticArray>::ValueT<'a>: AsRef<[u8]>,19{20let mut byte_hashes = Vec::with_capacity(ca.len());21for arr in ca.downcast_iter() {22for opt_b in arr.iter() {23let opt_b = opt_b.as_ref().map(|v| v.as_ref());24// SAFETY:25// the underlying data is tied to self26let opt_b = unsafe { std::mem::transmute::<Option<&[u8]>, Option<&'a [u8]>>(opt_b) };27let hash = match opt_b {28Some(s) => hb.hash_one(s),29None => null_h,30};31byte_hashes.push(BytesHash::new(opt_b, hash))32}33}34byte_hashes35}3637impl<T> ChunkedArray<T>38where39T: PolarsDataType,40for<'a> <T::Array as StaticArray>::ValueT<'a>: AsRef<[u8]>,41{42#[allow(clippy::needless_lifetimes)]43pub fn to_bytes_hashes<'a>(44&'a self,45mut multithreaded: bool,46hb: PlRandomState,47) -> Vec<Vec<BytesHash<'a>>> {48multithreaded &= POOL.current_num_threads() > 1;49let null_h = hb.hash_one(0xde259df92c607d49_u64);5051if multithreaded {52let n_partitions = _set_partition_size();5354let split = _split_offsets(self.len(), n_partitions);5556POOL.install(|| {57split58.into_par_iter()59.map(|(offset, len)| {60let ca = self.slice(offset as i64, len);61let byte_hashes = fill_bytes_hashes(&ca, null_h, hb);6263// SAFETY:64// the underlying data is tied to self65unsafe {66std::mem::transmute::<Vec<BytesHash<'_>>, Vec<BytesHash<'a>>>(67byte_hashes,68)69}70})71.collect::<Vec<_>>()72})73} else {74vec![fill_bytes_hashes(self, null_h, hb)]75}76}77}787980