Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/unique/mod.rs
6939 views
1
use arrow::array::Array;
2
3
/// Kernel to calculate the number of unique elements where the elements are already sorted.
4
pub trait SortedUniqueKernel: Array {
5
/// Calculate the set of unique elements in `fst` and `others` and fold the result into one
6
/// array.
7
fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;
8
9
/// Calculate the set of unique elements in [`Self`] where we have no further information about
10
/// `self`.
11
fn unique(&self) -> Self;
12
13
/// Calculate the number of unique elements in [`Self`]
14
///
15
/// A null is also considered a unique value
16
fn n_unique(&self) -> usize;
17
18
/// Calculate the number of unique non-null elements in [`Self`]
19
fn n_unique_non_null(&self) -> usize;
20
}
21
22
/// Optimized kernel to calculate the unique elements of an array.
23
///
24
/// This kernel is a specialized for where all values are known to be in some small range of
25
/// values. In this case, you can usually get by with a bitset and bit-arithmetic instead of using
26
/// vectors and hashsets. Consequently, this kernel is usually called when further information is
27
/// known about the underlying array.
28
///
29
/// This trait is not implemented directly on the `Array` as with many other kernels. Rather, it is
30
/// implemented on a `State` struct to which `Array`s can be appended. This allows for sharing of
31
/// `State` between many chunks and allows for different implementations for the same array (e.g. a
32
/// maintain order and no maintain-order variant).
33
pub trait RangedUniqueKernel {
34
type Array: Array;
35
36
/// Returns whether all the values in the whole range are in the state
37
fn has_seen_all(&self) -> bool;
38
39
/// Append an `Array`'s values to the `State`
40
fn append(&mut self, array: &Self::Array);
41
42
/// Append another state into the `State`
43
fn append_state(&mut self, other: &Self);
44
45
/// Consume the state to get the unique elements
46
fn finalize_unique(self) -> Self::Array;
47
/// Consume the state to get the number of unique elements including null
48
fn finalize_n_unique(&self) -> usize;
49
/// Consume the state to get the number of unique elements excluding null
50
fn finalize_n_unique_non_null(&self) -> usize;
51
}
52
53
/// A generic unique kernel that selects the generally applicable unique kernel for an `Array`.
54
pub trait GenericUniqueKernel {
55
/// Calculate the set of unique elements
56
fn unique(&self) -> Self;
57
/// Calculate the number of unique elements including null
58
fn n_unique(&self) -> usize;
59
/// Calculate the number of unique elements excluding null
60
fn n_unique_non_null(&self) -> usize;
61
}
62
63
mod boolean;
64
mod dictionary;
65
mod primitive;
66
67
pub use boolean::BooleanUniqueKernelState;
68
pub use dictionary::DictionaryRangedUniqueState;
69
pub use primitive::PrimitiveRangedUniqueState;
70
71