Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/propagate_dictionary.rs
6939 views
1
use arrow::array::{Array, BinaryViewArray, PrimitiveArray, Utf8ViewArray};
2
use arrow::bitmap::Bitmap;
3
use arrow::datatypes::ArrowDataType::UInt32;
4
5
/// Propagate the nulls from the dictionary values into the keys and remove those nulls from the
6
/// values.
7
pub fn propagate_dictionary_value_nulls(
8
keys: &PrimitiveArray<u32>,
9
values: &Utf8ViewArray,
10
) -> (PrimitiveArray<u32>, Utf8ViewArray) {
11
let Some(values_validity) = values.validity() else {
12
return (keys.clone(), values.clone().with_validity(None));
13
};
14
if values_validity.unset_bits() == 0 {
15
return (keys.clone(), values.clone().with_validity(None));
16
}
17
18
let num_values = values.len();
19
20
// Create a map from the old indices to indices with nulls filtered out
21
let mut offset = 0;
22
let new_idx_map: Vec<u32> = (0..num_values)
23
.map(|i| {
24
let is_valid = unsafe { values_validity.get_bit_unchecked(i) };
25
offset += usize::from(!is_valid);
26
if is_valid { (i - offset) as u32 } else { 0 }
27
})
28
.collect();
29
30
let keys = match keys.validity() {
31
None => {
32
let values = keys
33
.values()
34
.iter()
35
.map(|&k| unsafe {
36
// SAFETY: Arrow invariant that all keys are in range of values
37
*new_idx_map.get_unchecked(k as usize)
38
})
39
.collect();
40
let validity = Bitmap::from_iter(keys.values().iter().map(|&k| unsafe {
41
// SAFETY: Arrow invariant that all keys are in range of values
42
values_validity.get_bit_unchecked(k as usize)
43
}));
44
45
PrimitiveArray::new(UInt32, values, Some(validity))
46
},
47
Some(keys_validity) => {
48
let values = keys
49
.values()
50
.iter()
51
.map(|&k| {
52
// deal with nulls in keys
53
let idx = (k as usize).min(num_values);
54
// SAFETY: Arrow invariant that all keys are in range of values
55
*unsafe { new_idx_map.get_unchecked(idx) }
56
})
57
.collect();
58
let propagated_validity = Bitmap::from_iter(keys.values().iter().map(|&k| {
59
// deal with nulls in keys
60
let idx = (k as usize).min(num_values);
61
// SAFETY: Arrow invariant that all keys are in range of values
62
unsafe { values_validity.get_bit_unchecked(idx) }
63
}));
64
65
let validity = &propagated_validity & keys_validity;
66
PrimitiveArray::new(UInt32, values, Some(validity))
67
},
68
};
69
70
// Filter only handles binary
71
let values = values.to_binview();
72
73
// Filter out the null values
74
let values = crate::filter::filter_with_bitmap(&values, values_validity);
75
let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();
76
let values = unsafe { values.to_utf8view_unchecked() };
77
78
// Explicitly set the values validity to none.
79
assert_eq!(values.null_count(), 0);
80
let values = values.with_validity(None);
81
82
(keys, values)
83
}
84
85