Path: blob/main/crates/polars-compute/src/propagate_dictionary.rs
6939 views
use arrow::array::{Array, BinaryViewArray, PrimitiveArray, Utf8ViewArray};1use arrow::bitmap::Bitmap;2use arrow::datatypes::ArrowDataType::UInt32;34/// Propagate the nulls from the dictionary values into the keys and remove those nulls from the5/// values.6pub fn propagate_dictionary_value_nulls(7keys: &PrimitiveArray<u32>,8values: &Utf8ViewArray,9) -> (PrimitiveArray<u32>, Utf8ViewArray) {10let Some(values_validity) = values.validity() else {11return (keys.clone(), values.clone().with_validity(None));12};13if values_validity.unset_bits() == 0 {14return (keys.clone(), values.clone().with_validity(None));15}1617let num_values = values.len();1819// Create a map from the old indices to indices with nulls filtered out20let mut offset = 0;21let new_idx_map: Vec<u32> = (0..num_values)22.map(|i| {23let is_valid = unsafe { values_validity.get_bit_unchecked(i) };24offset += usize::from(!is_valid);25if is_valid { (i - offset) as u32 } else { 0 }26})27.collect();2829let keys = match keys.validity() {30None => {31let values = keys32.values()33.iter()34.map(|&k| unsafe {35// SAFETY: Arrow invariant that all keys are in range of values36*new_idx_map.get_unchecked(k as usize)37})38.collect();39let validity = Bitmap::from_iter(keys.values().iter().map(|&k| unsafe {40// SAFETY: Arrow invariant that all keys are in range of values41values_validity.get_bit_unchecked(k as usize)42}));4344PrimitiveArray::new(UInt32, values, Some(validity))45},46Some(keys_validity) => {47let values = keys48.values()49.iter()50.map(|&k| {51// deal with nulls in keys52let idx = (k as usize).min(num_values);53// SAFETY: Arrow invariant that all keys are in range of values54*unsafe { new_idx_map.get_unchecked(idx) }55})56.collect();57let propagated_validity = Bitmap::from_iter(keys.values().iter().map(|&k| {58// deal with nulls in keys59let idx = (k as usize).min(num_values);60// SAFETY: Arrow invariant that all keys are in range of values61unsafe { values_validity.get_bit_unchecked(idx) }62}));6364let validity = &propagated_validity & keys_validity;65PrimitiveArray::new(UInt32, values, Some(validity))66},67};6869// Filter only handles binary70let values = values.to_binview();7172// Filter out the null values73let values = crate::filter::filter_with_bitmap(&values, values_validity);74let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();75let values = unsafe { values.to_utf8view_unchecked() };7677// Explicitly set the values validity to none.78assert_eq!(values.null_count(), 0);79let values = values.with_validity(None);8081(keys, values)82}838485