Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/dictionary/mutable.rs
6939 views
1
use std::hash::Hash;
2
use std::sync::Arc;
3
4
use polars_error::PolarsResult;
5
6
use super::value_map::ValueMap;
7
use super::{DictionaryArray, DictionaryKey};
8
use crate::array::indexable::{AsIndexed, Indexable};
9
use crate::array::primitive::MutablePrimitiveArray;
10
use crate::array::{Array, MutableArray, TryExtend, TryPush};
11
use crate::bitmap::MutableBitmap;
12
use crate::datatypes::ArrowDataType;
13
14
#[derive(Debug)]
15
pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
16
dtype: ArrowDataType,
17
map: ValueMap<K, M>,
18
// invariant: `max(keys) < map.values().len()`
19
keys: MutablePrimitiveArray<K>,
20
}
21
22
impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
23
fn from(other: MutableDictionaryArray<K, M>) -> Self {
24
// SAFETY: the invariant of this struct ensures that this is up-held
25
unsafe {
26
DictionaryArray::<K>::try_new_unchecked(
27
other.dtype,
28
other.keys.into(),
29
other.map.into_values().as_box(),
30
)
31
.unwrap()
32
}
33
}
34
}
35
36
impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
37
/// Creates an empty [`MutableDictionaryArray`].
38
pub fn new() -> Self {
39
Self::try_empty(M::default()).unwrap()
40
}
41
}
42
43
impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
44
fn default() -> Self {
45
Self::new()
46
}
47
}
48
49
impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
50
/// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
51
/// # Errors
52
/// Errors if the array is non-empty.
53
pub fn try_empty(values: M) -> PolarsResult<Self> {
54
Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))
55
}
56
57
/// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.
58
/// Indices associated with those values are automatically assigned based on the order of
59
/// the values.
60
/// # Errors
61
/// Errors if there's more values than the maximum value of `K` or if values are not unique.
62
pub fn from_values(values: M) -> PolarsResult<Self>
63
where
64
M: Indexable,
65
M::Type: Eq + Hash,
66
{
67
Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))
68
}
69
70
fn from_value_map(value_map: ValueMap<K, M>) -> Self {
71
let keys = MutablePrimitiveArray::<K>::new();
72
let dtype =
73
ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);
74
Self {
75
dtype,
76
map: value_map,
77
keys,
78
}
79
}
80
81
/// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current
82
/// mutable dictionary array, but with no data. This may come useful when serializing the
83
/// array into multiple chunks, where there's a requirement that the dictionary is the same.
84
/// No copying is performed, the value map is moved over to the new array.
85
pub fn into_empty(self) -> Self {
86
Self::from_value_map(self.map)
87
}
88
89
/// Same as `into_empty` but clones the inner value map instead of taking full ownership.
90
pub fn to_empty(&self) -> Self
91
where
92
M: Clone,
93
{
94
Self::from_value_map(self.map.clone())
95
}
96
97
/// pushes a null value
98
pub fn push_null(&mut self) {
99
self.keys.push(None)
100
}
101
102
/// returns a reference to the inner values.
103
pub fn values(&self) -> &M {
104
self.map.values()
105
}
106
107
/// converts itself into [`Arc<dyn Array>`]
108
pub fn into_arc(self) -> Arc<dyn Array> {
109
let a: DictionaryArray<K> = self.into();
110
Arc::new(a)
111
}
112
113
/// converts itself into [`Box<dyn Array>`]
114
pub fn into_box(self) -> Box<dyn Array> {
115
let a: DictionaryArray<K> = self.into();
116
Box::new(a)
117
}
118
119
/// Reserves `additional` slots.
120
pub fn reserve(&mut self, additional: usize) {
121
self.keys.reserve(additional);
122
}
123
124
/// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
125
pub fn shrink_to_fit(&mut self) {
126
self.map.shrink_to_fit();
127
self.keys.shrink_to_fit();
128
}
129
130
/// Returns the dictionary keys
131
pub fn keys(&self) -> &MutablePrimitiveArray<K> {
132
&self.keys
133
}
134
135
fn take_into(&mut self) -> DictionaryArray<K> {
136
DictionaryArray::<K>::try_new(
137
self.dtype.clone(),
138
std::mem::take(&mut self.keys).into(),
139
self.map.take_into(),
140
)
141
.unwrap()
142
}
143
}
144
145
impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
146
fn len(&self) -> usize {
147
self.keys.len()
148
}
149
150
fn validity(&self) -> Option<&MutableBitmap> {
151
self.keys.validity()
152
}
153
154
fn as_box(&mut self) -> Box<dyn Array> {
155
Box::new(self.take_into())
156
}
157
158
fn as_arc(&mut self) -> Arc<dyn Array> {
159
Arc::new(self.take_into())
160
}
161
162
fn dtype(&self) -> &ArrowDataType {
163
&self.dtype
164
}
165
166
fn as_any(&self) -> &dyn std::any::Any {
167
self
168
}
169
170
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
171
self
172
}
173
174
fn push_null(&mut self) {
175
self.keys.push(None)
176
}
177
178
fn reserve(&mut self, additional: usize) {
179
self.reserve(additional)
180
}
181
182
fn shrink_to_fit(&mut self) {
183
self.shrink_to_fit()
184
}
185
}
186
187
impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
188
where
189
K: DictionaryKey,
190
M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,
191
T: AsIndexed<M>,
192
M::Type: Eq + Hash,
193
{
194
fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {
195
for value in iter {
196
if let Some(value) = value {
197
let key = self
198
.map
199
.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
200
self.keys.try_push(Some(key))?;
201
} else {
202
self.push_null();
203
}
204
}
205
Ok(())
206
}
207
}
208
209
impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
210
where
211
K: DictionaryKey,
212
M: MutableArray + Indexable + TryPush<Option<T>>,
213
T: AsIndexed<M>,
214
M::Type: Eq + Hash,
215
{
216
fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
217
if let Some(value) = item {
218
let key = self
219
.map
220
.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
221
self.keys.try_push(Some(key))?;
222
} else {
223
self.push_null();
224
}
225
Ok(())
226
}
227
}
228
229