Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/dictionary/value_map.rs
6939 views
1
use std::borrow::Borrow;
2
use std::fmt::{self, Debug};
3
use std::hash::{BuildHasher, Hash};
4
5
use hashbrown::HashTable;
6
use hashbrown::hash_table::Entry;
7
use polars_error::{PolarsResult, polars_bail, polars_err};
8
use polars_utils::aliases::PlRandomState;
9
10
use super::DictionaryKey;
11
use crate::array::indexable::{AsIndexed, Indexable};
12
use crate::array::{Array, MutableArray};
13
use crate::datatypes::ArrowDataType;
14
15
#[derive(Clone)]
16
pub struct ValueMap<K: DictionaryKey, M: MutableArray> {
17
pub values: M,
18
pub map: HashTable<(u64, K)>,
19
random_state: PlRandomState,
20
}
21
22
impl<K: DictionaryKey, M: MutableArray> ValueMap<K, M> {
23
pub fn try_empty(values: M) -> PolarsResult<Self> {
24
if !values.is_empty() {
25
polars_bail!(ComputeError: "initializing value map with non-empty values array")
26
}
27
Ok(Self {
28
values,
29
map: HashTable::default(),
30
random_state: PlRandomState::default(),
31
})
32
}
33
34
pub fn from_values(values: M) -> PolarsResult<Self>
35
where
36
M: Indexable,
37
M::Type: Eq + Hash,
38
{
39
let mut map: HashTable<(u64, K)> = HashTable::with_capacity(values.len());
40
let random_state = PlRandomState::default();
41
for index in 0..values.len() {
42
let key = K::try_from(index).map_err(|_| polars_err!(ComputeError: "overflow"))?;
43
// SAFETY: we only iterate within bounds
44
let value = unsafe { values.value_unchecked_at(index) };
45
let hash = random_state.hash_one(value.borrow());
46
47
let entry = map.entry(
48
hash,
49
|(_h, key)| {
50
// SAFETY: invariant of the struct, it's always in bounds.
51
let stored_value = unsafe { values.value_unchecked_at(key.as_usize()) };
52
stored_value.borrow() == value.borrow()
53
},
54
|(h, _key)| *h,
55
);
56
match entry {
57
Entry::Occupied(_) => {
58
polars_bail!(InvalidOperation: "duplicate value in dictionary values array")
59
},
60
Entry::Vacant(entry) => {
61
entry.insert((hash, key));
62
},
63
}
64
}
65
Ok(Self {
66
values,
67
map,
68
random_state,
69
})
70
}
71
72
pub fn dtype(&self) -> &ArrowDataType {
73
self.values.dtype()
74
}
75
76
pub fn into_values(self) -> M {
77
self.values
78
}
79
80
pub fn take_into(&mut self) -> Box<dyn Array> {
81
let arr = self.values.as_box();
82
self.map.clear();
83
arr
84
}
85
86
#[inline]
87
pub fn values(&self) -> &M {
88
&self.values
89
}
90
91
/// Try to insert a value and return its index (it may or may not get inserted).
92
pub fn try_push_valid<V>(
93
&mut self,
94
value: V,
95
mut push: impl FnMut(&mut M, V) -> PolarsResult<()>,
96
) -> PolarsResult<K>
97
where
98
M: Indexable,
99
V: AsIndexed<M>,
100
M::Type: Eq + Hash,
101
{
102
let hash = self.random_state.hash_one(value.as_indexed());
103
let entry = self.map.entry(
104
hash,
105
|(_h, key)| {
106
// SAFETY: invariant of the struct, it's always in bounds.
107
let stored_value = unsafe { self.values.value_unchecked_at(key.as_usize()) };
108
stored_value.borrow() == value.as_indexed()
109
},
110
|(h, _key)| *h,
111
);
112
let out = match entry {
113
Entry::Occupied(entry) => entry.get().1,
114
Entry::Vacant(entry) => {
115
let index = self.values.len();
116
let key = K::try_from(index).map_err(|_| polars_err!(ComputeError: "overflow"))?;
117
entry.insert((hash, key));
118
push(&mut self.values, value)?;
119
debug_assert_eq!(self.values.len(), index + 1);
120
key
121
},
122
};
123
Ok(out)
124
}
125
126
pub fn shrink_to_fit(&mut self) {
127
self.values.shrink_to_fit();
128
}
129
}
130
131
impl<K: DictionaryKey, M: MutableArray> Debug for ValueMap<K, M> {
132
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133
self.values.fmt(f)
134
}
135
}
136
137