Path: blob/main/crates/polars-compute/src/gather/primitive.rs
6939 views
use arrow::array::PrimitiveArray;1use arrow::bitmap::utils::set_bit_unchecked;2use arrow::bitmap::{Bitmap, MutableBitmap};3use arrow::legacy::index::IdxArr;4use arrow::legacy::utils::CustomIterTools;5use arrow::types::NativeType;6use polars_utils::index::NullCount;78pub(super) unsafe fn take_values_and_validity_unchecked<T: NativeType>(9values: &[T],10validity_values: Option<&Bitmap>,11indices: &IdxArr,12) -> (Vec<T>, Option<Bitmap>) {13let index_values = indices.values().as_slice();1415let null_count = validity_values.map(|b| b.unset_bits()).unwrap_or(0);1617// first take the values, these are always needed18let values: Vec<T> = if indices.null_count() == 0 {19index_values20.iter()21.map(|idx| *values.get_unchecked(*idx as usize))22.collect_trusted()23} else {24indices25.iter()26.map(|idx| match idx {27Some(idx) => *values.get_unchecked(*idx as usize),28None => T::default(),29})30.collect_trusted()31};3233if null_count > 0 {34let validity_values = validity_values.unwrap();35// the validity buffer we will fill with all valid. And we unset the ones that are null36// in later checks37// this is in the assumption that most values will be valid.38// Maybe we could add another branch based on the null count39let mut validity = MutableBitmap::with_capacity(indices.len());40validity.extend_constant(indices.len(), true);41let validity_slice = validity.as_mut_slice();4243if let Some(validity_indices) = indices.validity().as_ref() {44index_values.iter().enumerate().for_each(|(i, idx)| {45// i is iteration count46// idx is the index that we take from the values array.47let idx = *idx as usize;48if !validity_indices.get_bit_unchecked(i) || !validity_values.get_bit_unchecked(idx)49{50set_bit_unchecked(validity_slice, i, false);51}52});53} else {54index_values.iter().enumerate().for_each(|(i, idx)| {55let idx = *idx as usize;56if !validity_values.get_bit_unchecked(idx) {57set_bit_unchecked(validity_slice, i, false);58}59});60};61(values, Some(validity.freeze()))62} else {63(values, indices.validity().cloned())64}65}6667/// Take kernel for single chunk with nulls and arrow array as index that may have nulls.68/// # Safety69/// caller must ensure indices are in bounds70pub unsafe fn take_primitive_unchecked<T: NativeType>(71arr: &PrimitiveArray<T>,72indices: &IdxArr,73) -> PrimitiveArray<T> {74let (values, validity) =75take_values_and_validity_unchecked(arr.values(), arr.validity(), indices);76PrimitiveArray::new_unchecked(arr.dtype().clone(), values.into(), validity)77}787980