Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/list/mutable.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_error::{PolarsResult, polars_err};
4
use polars_utils::pl_str::PlSmallStr;
5
6
use super::ListArray;
7
use crate::array::physical_binary::extend_validity;
8
use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
9
use crate::bitmap::MutableBitmap;
10
use crate::datatypes::{ArrowDataType, Field};
11
use crate::offset::{Offset, Offsets};
12
use crate::trusted_len::TrustedLen;
13
14
/// The mutable version of [`ListArray`].
15
#[derive(Debug, Clone)]
16
pub struct MutableListArray<O: Offset, M: MutableArray> {
17
dtype: ArrowDataType,
18
offsets: Offsets<O>,
19
values: M,
20
validity: Option<MutableBitmap>,
21
}
22
23
impl<O: Offset, M: MutableArray + Default> MutableListArray<O, M> {
24
/// Creates a new empty [`MutableListArray`].
25
pub fn new() -> Self {
26
let values = M::default();
27
let dtype = ListArray::<O>::default_datatype(values.dtype().clone());
28
Self::new_from(values, dtype, 0)
29
}
30
31
/// Creates a new [`MutableListArray`] with a capacity.
32
pub fn with_capacity(capacity: usize) -> Self {
33
let values = M::default();
34
let dtype = ListArray::<O>::default_datatype(values.dtype().clone());
35
36
let offsets = Offsets::<O>::with_capacity(capacity);
37
Self {
38
dtype,
39
offsets,
40
values,
41
validity: None,
42
}
43
}
44
}
45
46
impl<O: Offset, M: MutableArray + Default> Default for MutableListArray<O, M> {
47
fn default() -> Self {
48
Self::new()
49
}
50
}
51
52
impl<O: Offset, M: MutableArray> From<MutableListArray<O, M>> for ListArray<O> {
53
fn from(mut other: MutableListArray<O, M>) -> Self {
54
ListArray::new(
55
other.dtype,
56
other.offsets.into(),
57
other.values.as_box(),
58
other.validity.map(|x| x.into()),
59
)
60
}
61
}
62
63
impl<O, M, I, T> TryExtend<Option<I>> for MutableListArray<O, M>
64
where
65
O: Offset,
66
M: MutableArray + TryExtend<Option<T>>,
67
I: IntoIterator<Item = Option<T>>,
68
{
69
fn try_extend<II: IntoIterator<Item = Option<I>>>(&mut self, iter: II) -> PolarsResult<()> {
70
let iter = iter.into_iter();
71
self.reserve(iter.size_hint().0);
72
for items in iter {
73
self.try_push(items)?;
74
}
75
Ok(())
76
}
77
}
78
79
impl<O, M, I, T> TryPush<Option<I>> for MutableListArray<O, M>
80
where
81
O: Offset,
82
M: MutableArray + TryExtend<Option<T>>,
83
I: IntoIterator<Item = Option<T>>,
84
{
85
#[inline]
86
fn try_push(&mut self, item: Option<I>) -> PolarsResult<()> {
87
if let Some(items) = item {
88
let values = self.mut_values();
89
values.try_extend(items)?;
90
self.try_push_valid()?;
91
} else {
92
self.push_null();
93
}
94
Ok(())
95
}
96
}
97
98
impl<O, M> TryExtendFromSelf for MutableListArray<O, M>
99
where
100
O: Offset,
101
M: MutableArray + TryExtendFromSelf,
102
{
103
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
104
extend_validity(self.len(), &mut self.validity, &other.validity);
105
106
self.values.try_extend_from_self(&other.values)?;
107
self.offsets.try_extend_from_self(&other.offsets)
108
}
109
}
110
111
impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
112
/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
113
pub fn new_from(values: M, dtype: ArrowDataType, capacity: usize) -> Self {
114
let offsets = Offsets::<O>::with_capacity(capacity);
115
assert_eq!(values.len(), 0);
116
ListArray::<O>::get_child_field(&dtype);
117
Self {
118
dtype,
119
offsets,
120
values,
121
validity: None,
122
}
123
}
124
125
/// Creates a new [`MutableListArray`] from a [`MutableArray`].
126
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool) -> Self {
127
let field = Box::new(Field::new(name, values.dtype().clone(), nullable));
128
let dtype = if O::IS_LARGE {
129
ArrowDataType::LargeList(field)
130
} else {
131
ArrowDataType::List(field)
132
};
133
Self::new_from(values, dtype, 0)
134
}
135
136
/// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity.
137
pub fn new_with_capacity(values: M, capacity: usize) -> Self {
138
let dtype = ListArray::<O>::default_datatype(values.dtype().clone());
139
Self::new_from(values, dtype, capacity)
140
}
141
142
/// Creates a new [`MutableListArray`] from a [`MutableArray`], [`Offsets`] and
143
/// [`MutableBitmap`].
144
pub fn new_from_mutable(
145
values: M,
146
offsets: Offsets<O>,
147
validity: Option<MutableBitmap>,
148
) -> Self {
149
assert_eq!(values.len(), offsets.last().to_usize());
150
let dtype = ListArray::<O>::default_datatype(values.dtype().clone());
151
Self {
152
dtype,
153
offsets,
154
values,
155
validity,
156
}
157
}
158
159
#[inline]
160
/// Needs to be called when a valid value was extended to this array.
161
/// This is a relatively low level function, prefer `try_push` when you can.
162
pub fn try_push_valid(&mut self) -> PolarsResult<()> {
163
let total_length = self.values.len();
164
let offset = self.offsets.last().to_usize();
165
let length = total_length
166
.checked_sub(offset)
167
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
168
169
self.offsets.try_push(length)?;
170
if let Some(validity) = &mut self.validity {
171
validity.push(true)
172
}
173
Ok(())
174
}
175
176
#[inline]
177
fn push_null(&mut self) {
178
self.offsets.extend_constant(1);
179
match &mut self.validity {
180
Some(validity) => validity.push(false),
181
None => self.init_validity(),
182
}
183
}
184
185
/// Expand this array, using elements from the underlying backing array.
186
/// Assumes the expansion begins at the highest previous offset, or zero if
187
/// this [`MutableListArray`] is currently empty.
188
///
189
/// Panics if:
190
/// - the new offsets are not in monotonic increasing order.
191
/// - any new offset is not in bounds of the backing array.
192
/// - the passed iterator has no upper bound.
193
pub fn try_extend_from_lengths<II>(&mut self, iterator: II) -> PolarsResult<()>
194
where
195
II: TrustedLen<Item = Option<usize>> + Clone,
196
{
197
self.offsets
198
.try_extend_from_lengths(iterator.clone().map(|x| x.unwrap_or_default()))?;
199
if let Some(validity) = &mut self.validity {
200
validity.extend_from_trusted_len_iter(iterator.map(|x| x.is_some()))
201
}
202
assert_eq!(self.offsets.last().to_usize(), self.values.len());
203
Ok(())
204
}
205
206
/// Returns the length of this array
207
#[inline]
208
pub fn len(&self) -> usize {
209
self.offsets.len_proxy()
210
}
211
212
/// The values
213
pub fn mut_values(&mut self) -> &mut M {
214
&mut self.values
215
}
216
217
/// The offsets
218
pub fn offsets(&self) -> &Offsets<O> {
219
&self.offsets
220
}
221
222
/// The values
223
pub fn values(&self) -> &M {
224
&self.values
225
}
226
227
fn init_validity(&mut self) {
228
let len = self.offsets.len_proxy();
229
230
let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
231
validity.extend_constant(len, true);
232
validity.set(len - 1, false);
233
self.validity = Some(validity)
234
}
235
236
/// Converts itself into an [`Array`].
237
pub fn into_arc(self) -> Arc<dyn Array> {
238
let a: ListArray<O> = self.into();
239
Arc::new(a)
240
}
241
242
/// converts itself into [`Box<dyn Array>`]
243
pub fn into_box(self) -> Box<dyn Array> {
244
let a: ListArray<O> = self.into();
245
Box::new(a)
246
}
247
248
/// Reserves `additional` slots.
249
pub fn reserve(&mut self, additional: usize) {
250
self.offsets.reserve(additional);
251
if let Some(x) = self.validity.as_mut() {
252
x.reserve(additional)
253
}
254
}
255
256
/// Shrinks the capacity of the [`MutableListArray`] to fit its current length.
257
pub fn shrink_to_fit(&mut self) {
258
self.values.shrink_to_fit();
259
self.offsets.shrink_to_fit();
260
if let Some(validity) = &mut self.validity {
261
validity.shrink_to_fit()
262
}
263
}
264
}
265
266
impl<O: Offset, M: MutableArray + 'static> MutableArray for MutableListArray<O, M> {
267
fn len(&self) -> usize {
268
MutableListArray::len(self)
269
}
270
271
fn validity(&self) -> Option<&MutableBitmap> {
272
self.validity.as_ref()
273
}
274
275
fn as_box(&mut self) -> Box<dyn Array> {
276
ListArray::new(
277
self.dtype.clone(),
278
std::mem::take(&mut self.offsets).into(),
279
self.values.as_box(),
280
std::mem::take(&mut self.validity).map(|x| x.into()),
281
)
282
.boxed()
283
}
284
285
fn as_arc(&mut self) -> Arc<dyn Array> {
286
ListArray::new(
287
self.dtype.clone(),
288
std::mem::take(&mut self.offsets).into(),
289
self.values.as_box(),
290
std::mem::take(&mut self.validity).map(|x| x.into()),
291
)
292
.arced()
293
}
294
295
fn dtype(&self) -> &ArrowDataType {
296
&self.dtype
297
}
298
299
fn as_any(&self) -> &dyn std::any::Any {
300
self
301
}
302
303
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
304
self
305
}
306
307
#[inline]
308
fn push_null(&mut self) {
309
self.push_null()
310
}
311
312
fn reserve(&mut self, additional: usize) {
313
self.reserve(additional)
314
}
315
316
fn shrink_to_fit(&mut self) {
317
self.shrink_to_fit();
318
}
319
}
320
321