Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/dictionary/mutable.rs
8382 views
1
use std::hash::Hash;
2
use std::sync::Arc;
3
4
use polars_error::PolarsResult;
5
6
use super::value_map::ValueMap;
7
use super::{DictionaryArray, DictionaryKey};
8
use crate::array::indexable::{AsIndexed, Indexable};
9
use crate::array::primitive::MutablePrimitiveArray;
10
use crate::array::{Array, MutableArray, TryExtend, TryPush};
11
use crate::bitmap::MutableBitmap;
12
use crate::datatypes::ArrowDataType;
13
14
#[derive(Debug)]
15
pub struct MutableDictionaryArray<K: DictionaryKey, M: MutableArray> {
16
dtype: ArrowDataType,
17
map: ValueMap<K, M>,
18
// invariant: `max(keys) < map.values().len()`
19
keys: MutablePrimitiveArray<K>,
20
}
21
22
impl<K: DictionaryKey, M: MutableArray> From<MutableDictionaryArray<K, M>> for DictionaryArray<K> {
23
fn from(other: MutableDictionaryArray<K, M>) -> Self {
24
// SAFETY: the invariant of this struct ensures that this is up-held
25
unsafe {
26
DictionaryArray::<K>::try_new_unchecked(
27
other.dtype,
28
other.keys.into(),
29
other.map.into_values().as_box(),
30
)
31
.unwrap()
32
}
33
}
34
}
35
36
impl<K: DictionaryKey, M: MutableArray + Default> MutableDictionaryArray<K, M> {
37
/// Creates an empty [`MutableDictionaryArray`].
38
pub fn new() -> Self {
39
Self::try_empty(M::default()).unwrap()
40
}
41
42
/// Creates an empty [`MutableDictionaryArray`] with the given value dtype.
43
pub fn empty_with_value_dtype(value_dtype: ArrowDataType) -> Self {
44
let keys = MutablePrimitiveArray::<K>::new();
45
let dtype = ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_dtype), false);
46
Self {
47
dtype,
48
map: ValueMap::<K, M>::try_empty(M::default()).unwrap(),
49
keys,
50
}
51
}
52
}
53
54
impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryArray<K, M> {
55
fn default() -> Self {
56
Self::new()
57
}
58
}
59
60
impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
61
/// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
62
/// # Errors
63
/// Errors if the array is non-empty.
64
pub fn try_empty(values: M) -> PolarsResult<Self> {
65
Ok(Self::from_value_map(ValueMap::<K, M>::try_empty(values)?))
66
}
67
68
/// Creates an empty [`MutableDictionaryArray`] preloaded with a given dictionary of values.
69
/// Indices associated with those values are automatically assigned based on the order of
70
/// the values.
71
/// # Errors
72
/// Errors if there's more values than the maximum value of `K` or if values are not unique.
73
pub fn from_values(values: M) -> PolarsResult<Self>
74
where
75
M: Indexable,
76
M::Type: Eq + Hash,
77
{
78
Ok(Self::from_value_map(ValueMap::<K, M>::from_values(values)?))
79
}
80
81
fn from_value_map(value_map: ValueMap<K, M>) -> Self {
82
let keys = MutablePrimitiveArray::<K>::new();
83
let dtype =
84
ArrowDataType::Dictionary(K::KEY_TYPE, Box::new(value_map.dtype().clone()), false);
85
Self {
86
dtype,
87
map: value_map,
88
keys,
89
}
90
}
91
92
/// Creates an empty [`MutableDictionaryArray`] retaining the same dictionary as the current
93
/// mutable dictionary array, but with no data. This may come useful when serializing the
94
/// array into multiple chunks, where there's a requirement that the dictionary is the same.
95
/// No copying is performed, the value map is moved over to the new array.
96
pub fn into_empty(self) -> Self {
97
Self::from_value_map(self.map)
98
}
99
100
/// Same as `into_empty` but clones the inner value map instead of taking full ownership.
101
pub fn to_empty(&self) -> Self
102
where
103
M: Clone,
104
{
105
Self::from_value_map(self.map.clone())
106
}
107
108
/// pushes a null value
109
pub fn push_null(&mut self) {
110
self.keys.push(None)
111
}
112
113
/// returns a reference to the inner values.
114
pub fn values(&self) -> &M {
115
self.map.values()
116
}
117
118
/// converts itself into [`Arc<dyn Array>`]
119
pub fn into_arc(self) -> Arc<dyn Array> {
120
let a: DictionaryArray<K> = self.into();
121
Arc::new(a)
122
}
123
124
/// converts itself into [`Box<dyn Array>`]
125
pub fn into_box(self) -> Box<dyn Array> {
126
let a: DictionaryArray<K> = self.into();
127
Box::new(a)
128
}
129
130
/// Reserves `additional` slots.
131
pub fn reserve(&mut self, additional: usize) {
132
self.keys.reserve(additional);
133
}
134
135
/// Shrinks the capacity of the [`MutableDictionaryArray`] to fit its current length.
136
pub fn shrink_to_fit(&mut self) {
137
self.map.shrink_to_fit();
138
self.keys.shrink_to_fit();
139
}
140
141
/// Returns the dictionary keys
142
pub fn keys(&self) -> &MutablePrimitiveArray<K> {
143
&self.keys
144
}
145
146
fn take_into(&mut self) -> DictionaryArray<K> {
147
DictionaryArray::<K>::try_new(
148
self.dtype.clone(),
149
std::mem::take(&mut self.keys).into(),
150
self.map.take_into(),
151
)
152
.unwrap()
153
}
154
}
155
156
impl<K: DictionaryKey, M: 'static + MutableArray> MutableArray for MutableDictionaryArray<K, M> {
157
fn len(&self) -> usize {
158
self.keys.len()
159
}
160
161
fn validity(&self) -> Option<&MutableBitmap> {
162
self.keys.validity()
163
}
164
165
fn as_box(&mut self) -> Box<dyn Array> {
166
Box::new(self.take_into())
167
}
168
169
fn as_arc(&mut self) -> Arc<dyn Array> {
170
Arc::new(self.take_into())
171
}
172
173
fn dtype(&self) -> &ArrowDataType {
174
&self.dtype
175
}
176
177
fn as_any(&self) -> &dyn std::any::Any {
178
self
179
}
180
181
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
182
self
183
}
184
185
fn push_null(&mut self) {
186
self.keys.push(None)
187
}
188
189
fn reserve(&mut self, additional: usize) {
190
self.reserve(additional)
191
}
192
193
fn shrink_to_fit(&mut self) {
194
self.shrink_to_fit()
195
}
196
}
197
198
impl<K, M, T> TryExtend<Option<T>> for MutableDictionaryArray<K, M>
199
where
200
K: DictionaryKey,
201
M: MutableArray + Indexable + TryExtend<Option<T>> + TryPush<Option<T>>,
202
T: AsIndexed<M>,
203
M::Type: Eq + Hash,
204
{
205
fn try_extend<II: IntoIterator<Item = Option<T>>>(&mut self, iter: II) -> PolarsResult<()> {
206
for value in iter {
207
if let Some(value) = value {
208
let key = self
209
.map
210
.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
211
self.keys.try_push(Some(key))?;
212
} else {
213
self.push_null();
214
}
215
}
216
Ok(())
217
}
218
}
219
220
impl<K, M, T> TryPush<Option<T>> for MutableDictionaryArray<K, M>
221
where
222
K: DictionaryKey,
223
M: MutableArray + Indexable + TryPush<Option<T>>,
224
T: AsIndexed<M>,
225
M::Type: Eq + Hash,
226
{
227
fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
228
if let Some(value) = item {
229
let key = self
230
.map
231
.try_push_valid(value, |arr, v| arr.try_push(Some(v)))?;
232
self.keys.try_push(Some(key))?;
233
} else {
234
self.push_null();
235
}
236
Ok(())
237
}
238
}
239
240