Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/nan.rs
8424 views
1
#![allow(clippy::eq_op)] // We use x != x to detect NaN generically.
2
3
use arrow::bitmap::Bitmap;
4
use polars_buffer::SharedStorage;
5
use polars_utils::float::IsFloat;
6
7
fn chunk_has_nan<T: PartialEq>(arr: &[T; 64]) -> bool {
8
// This has some hackery to improve autovectorization.
9
let mut has_nan = false;
10
for i in 0..32 {
11
has_nan |= (arr[i] != arr[i]) | (arr[i + 32] != arr[i + 32]);
12
}
13
has_nan
14
}
15
16
fn chunk_nan_mask<T: PartialEq>(arr: &[T; 64]) -> u64 {
17
let mut mask = 0;
18
for (i, v) in arr.iter().enumerate() {
19
mask |= ((v != v) as u64) << i;
20
}
21
mask
22
}
23
24
/// Returns the first i for which slice[i].is_nan() is true, if any.
25
pub fn first_nan_idx<T: PartialEq + IsFloat>(slice: &[T]) -> Option<usize> {
26
assert!(T::is_float());
27
let mut offset = 0;
28
let (chunks, last_chunk) = slice.as_chunks::<64>();
29
for chunk in chunks {
30
if chunk_has_nan(chunk) {
31
let offset_in_chunk = chunk_nan_mask(chunk).trailing_zeros() as usize;
32
return Some(offset + offset_in_chunk);
33
}
34
offset += 64;
35
}
36
last_chunk.iter().position(|x| x != x).map(|i| offset + i)
37
}
38
39
/// Returns a bitmap, where bitmap[i] = slice[i].is_nan(). If None is returned
40
/// none of the elements are NaN.
41
pub fn is_nan<T: PartialEq + IsFloat>(slice: &[T]) -> Option<Bitmap> {
42
is_not_nan_impl(slice, true)
43
}
44
45
/// Returns a bitmap, where bitmap[i] = !slice[i].is_nan(). If None is returned
46
/// none of the elements are NaN.
47
pub fn is_not_nan<T: PartialEq + IsFloat>(slice: &[T]) -> Option<Bitmap> {
48
is_not_nan_impl(slice, false)
49
}
50
51
fn is_not_nan_impl<T: PartialEq + IsFloat>(slice: &[T], invert: bool) -> Option<Bitmap> {
52
assert!(T::is_float());
53
let invert_mask = if invert { u64::MAX } else { 0 };
54
let first_idx = first_nan_idx(slice)?;
55
let no_nan_chunks = first_idx / 64;
56
let mut words = Vec::with_capacity(slice.len().div_ceil(64));
57
let mut unset_bits = 0;
58
words.resize(no_nan_chunks, u64::MAX ^ invert_mask);
59
60
let (chunks, last_chunk) = slice.as_chunks::<64>();
61
let mut chunk_idx = no_nan_chunks;
62
while chunk_idx < chunks.len() {
63
let nan_mask = chunk_nan_mask(&chunks[chunk_idx]);
64
words.push(!nan_mask ^ invert_mask);
65
unset_bits += nan_mask.count_ones() as usize;
66
chunk_idx += 1;
67
68
if nan_mask == 0 {
69
// NaNs are probably rare, fast-path for skipping.
70
while chunk_idx < chunks.len() && !chunk_has_nan(&chunks[chunk_idx]) {
71
words.push(u64::MAX ^ invert_mask);
72
chunk_idx += 1
73
}
74
}
75
}
76
77
let mut last_word = 0;
78
for (i, v) in last_chunk.iter().enumerate() {
79
let is_nan = v != v;
80
last_word |= (!is_nan as u64) << i;
81
unset_bits += is_nan as usize;
82
}
83
words.push(last_word ^ invert_mask);
84
85
if invert {
86
unset_bits = slice.len() - unset_bits;
87
}
88
89
let storage = SharedStorage::from_vec(words)
90
.try_transmute::<u8>()
91
.ok()
92
.unwrap();
93
let bitmap = unsafe { Bitmap::from_inner_unchecked(storage, 0, slice.len(), Some(unset_bits)) };
94
Some(bitmap)
95
}
96
97