Path: blob/main/crates/polars-ops/src/chunked_array/list/hash.rs
6939 views
use std::hash::{BuildHasher, Hash};12use polars_core::series::BitRepr;3use polars_core::utils::NoNull;4use polars_core::{POOL, with_match_physical_float_polars_type};5use polars_utils::aliases::PlSeedableRandomStateQuality;6use polars_utils::hashing::_boost_hash_combine;7use polars_utils::total_ord::{ToTotalOrd, TotalHash};8use rayon::prelude::*;910use super::*;1112fn hash_agg<T>(ca: &ChunkedArray<T>, random_state: &PlSeedableRandomStateQuality) -> u6413where14T: PolarsNumericType,15T::Native: TotalHash + ToTotalOrd,16<T::Native as ToTotalOrd>::TotalOrdItem: Hash,17{18// Note that we don't use the no null branch! This can break in unexpected ways.19// for instance with threading we split an array in n_threads, this may lead to20// splits that have no nulls and splits that have nulls. Then one array is hashed with21// Option<T> and the other array with T.22// Meaning that they cannot be compared. By always hashing on Option<T> the random_state is23// the only deterministic seed.2425// just some large prime26let mut hash_agg = 9069731903u64;2728// just some large prime29let null_hash = 2413670057;3031ca.downcast_iter().for_each(|arr| {32for opt_v in arr.iter() {33match opt_v {34Some(v) => {35let r = random_state.hash_one(v.to_total_ord());36hash_agg = _boost_hash_combine(hash_agg, r);37},38None => {39hash_agg = _boost_hash_combine(hash_agg, null_hash);40},41}42}43});44hash_agg45}4647pub(crate) fn hash(48ca: &mut ListChunked,49build_hasher: PlSeedableRandomStateQuality,50) -> UInt64Chunked {51if !ca.inner_dtype().to_physical().is_primitive_numeric() {52panic!(53"Hashing a list with a non-numeric inner type not supported. Got dtype: {:?}",54ca.dtype()55);56}5758// just some large prime59let null_hash = 1969099309u64;6061ca.set_inner_dtype(ca.inner_dtype().to_physical());6263let out: NoNull<UInt64Chunked> = POOL.install(|| {64ca.par_iter()65.map(|opt_s: Option<Series>| match opt_s {66None => null_hash,67Some(s) => {68if s.dtype().is_float() {69with_match_physical_float_polars_type!(s.dtype(), |$T| {70let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();71hash_agg(ca, &build_hasher)72})73} else {74match s.bit_repr() {75None => unimplemented!("Hash for lists without bit representation"),76Some(BitRepr::U8(ca)) => hash_agg(&ca, &build_hasher),77Some(BitRepr::U16(ca)) => hash_agg(&ca, &build_hasher),78Some(BitRepr::U32(ca)) => hash_agg(&ca, &build_hasher),79Some(BitRepr::U64(ca)) => hash_agg(&ca, &build_hasher),80#[cfg(feature = "dtype-i128")]81Some(BitRepr::I128(ca)) => hash_agg(&ca, &build_hasher),82}83}84},85})86.collect()87});8889let mut out = out.into_inner();90out.rename(ca.name().clone());91out92}939495