Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/binary/mutable.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_error::{PolarsResult, polars_bail};
4
5
use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};
6
use crate::array::physical_binary::*;
7
use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8
use crate::bitmap::utils::{BitmapIter, ZipValidity};
9
use crate::bitmap::{Bitmap, MutableBitmap};
10
use crate::datatypes::ArrowDataType;
11
use crate::offset::{Offset, Offsets};
12
use crate::trusted_len::TrustedLen;
13
14
/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.
15
/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.
16
/// # Implementation
17
/// This struct does not allocate a validity until one is required (i.e. push a null to it).
18
#[derive(Debug, Clone)]
19
pub struct MutableBinaryArray<O: Offset> {
20
values: MutableBinaryValuesArray<O>,
21
validity: Option<MutableBitmap>,
22
}
23
24
impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {
25
fn from(other: MutableBinaryArray<O>) -> Self {
26
let validity = other.validity.and_then(|x| {
27
let validity: Option<Bitmap> = x.into();
28
validity
29
});
30
let array: BinaryArray<O> = other.values.into();
31
array.with_validity(validity)
32
}
33
}
34
35
impl<O: Offset> Default for MutableBinaryArray<O> {
36
fn default() -> Self {
37
Self::new()
38
}
39
}
40
41
impl<O: Offset> MutableBinaryArray<O> {
42
/// Creates a new empty [`MutableBinaryArray`].
43
/// # Implementation
44
/// This allocates a [`Vec`] of one element
45
pub fn new() -> Self {
46
Self::with_capacity(0)
47
}
48
49
/// Returns a [`MutableBinaryArray`] created from its internal representation.
50
///
51
/// # Errors
52
/// This function returns an error iff:
53
/// * The last offset is not equal to the values' length.
54
/// * the validity's length is not equal to `offsets.len()`.
55
/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
56
/// # Implementation
57
/// This function is `O(1)`
58
pub fn try_new(
59
dtype: ArrowDataType,
60
offsets: Offsets<O>,
61
values: Vec<u8>,
62
validity: Option<MutableBitmap>,
63
) -> PolarsResult<Self> {
64
let values = MutableBinaryValuesArray::try_new(dtype, offsets, values)?;
65
66
if validity
67
.as_ref()
68
.is_some_and(|validity| validity.len() != values.len())
69
{
70
polars_bail!(ComputeError: "validity's length must be equal to the number of values")
71
}
72
73
Ok(Self { values, validity })
74
}
75
76
/// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
77
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
78
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
79
Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
80
}
81
82
fn default_dtype() -> ArrowDataType {
83
BinaryArray::<O>::default_dtype()
84
}
85
86
/// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots.
87
pub fn with_capacity(capacity: usize) -> Self {
88
Self::with_capacities(capacity, 0)
89
}
90
91
/// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots and values.
92
/// # Implementation
93
/// This does not allocate the validity.
94
pub fn with_capacities(capacity: usize, values: usize) -> Self {
95
Self {
96
values: MutableBinaryValuesArray::with_capacities(capacity, values),
97
validity: None,
98
}
99
}
100
101
/// Reserves `additional` elements and `additional_values` on the values buffer.
102
pub fn reserve(&mut self, additional: usize, additional_values: usize) {
103
self.values.reserve(additional, additional_values);
104
if let Some(x) = self.validity.as_mut() {
105
x.reserve(additional)
106
}
107
}
108
109
/// Pushes a new element to the array.
110
/// # Panic
111
/// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
112
pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {
113
self.try_push(value).unwrap()
114
}
115
116
/// Pop the last entry from [`MutableBinaryArray`].
117
/// This function returns `None` iff this array is empty
118
pub fn pop(&mut self) -> Option<Vec<u8>> {
119
let value = self.values.pop()?;
120
self.validity
121
.as_mut()
122
.map(|x| x.pop()?.then(|| ()))
123
.unwrap_or_else(|| Some(()))
124
.map(|_| value)
125
}
126
127
fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
128
iter: I,
129
) -> PolarsResult<Self> {
130
let iterator = iter.into_iter();
131
let (lower, _) = iterator.size_hint();
132
let mut primitive = Self::with_capacity(lower);
133
for item in iterator {
134
primitive.try_push(item.as_ref())?
135
}
136
Ok(primitive)
137
}
138
139
fn init_validity(&mut self) {
140
let mut validity = MutableBitmap::with_capacity(self.values.capacity());
141
validity.extend_constant(self.len(), true);
142
validity.set(self.len() - 1, false);
143
self.validity = Some(validity);
144
}
145
146
/// Converts itself into an [`Array`].
147
pub fn into_arc(self) -> Arc<dyn Array> {
148
let a: BinaryArray<O> = self.into();
149
Arc::new(a)
150
}
151
152
/// Shrinks the capacity of the [`MutableBinaryArray`] to fit its current length.
153
pub fn shrink_to_fit(&mut self) {
154
self.values.shrink_to_fit();
155
if let Some(validity) = &mut self.validity {
156
validity.shrink_to_fit()
157
}
158
}
159
160
impl_mutable_array_mut_validity!();
161
}
162
163
impl<O: Offset> MutableBinaryArray<O> {
164
/// returns its values.
165
pub fn values(&self) -> &Vec<u8> {
166
self.values.values()
167
}
168
169
/// returns its offsets.
170
pub fn offsets(&self) -> &Offsets<O> {
171
self.values.offsets()
172
}
173
174
/// Returns an iterator of `Option<&[u8]>`
175
pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<'_, O>, BitmapIter<'_>> {
176
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
177
}
178
179
/// Returns an iterator over the values of this array
180
pub fn values_iter(&self) -> MutableBinaryValuesIter<'_, O> {
181
self.values.iter()
182
}
183
}
184
185
impl<O: Offset> MutableArray for MutableBinaryArray<O> {
186
fn len(&self) -> usize {
187
self.values.len()
188
}
189
190
fn validity(&self) -> Option<&MutableBitmap> {
191
self.validity.as_ref()
192
}
193
194
fn as_box(&mut self) -> Box<dyn Array> {
195
let array: BinaryArray<O> = std::mem::take(self).into();
196
array.boxed()
197
}
198
199
fn as_arc(&mut self) -> Arc<dyn Array> {
200
let array: BinaryArray<O> = std::mem::take(self).into();
201
array.arced()
202
}
203
204
fn dtype(&self) -> &ArrowDataType {
205
self.values.dtype()
206
}
207
208
fn as_any(&self) -> &dyn std::any::Any {
209
self
210
}
211
212
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
213
self
214
}
215
216
#[inline]
217
fn push_null(&mut self) {
218
self.push::<&[u8]>(None)
219
}
220
221
fn reserve(&mut self, additional: usize) {
222
self.reserve(additional, 0)
223
}
224
225
fn shrink_to_fit(&mut self) {
226
self.shrink_to_fit()
227
}
228
}
229
230
impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {
231
fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
232
Self::try_from_iter(iter).unwrap()
233
}
234
}
235
236
impl<O: Offset> MutableBinaryArray<O> {
237
/// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
238
///
239
/// # Safety
240
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
241
/// I.e. that `size_hint().1` correctly reports its length.
242
#[inline]
243
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
244
where
245
P: AsRef<[u8]>,
246
I: Iterator<Item = Option<P>>,
247
{
248
let (validity, offsets, values) = trusted_len_unzip(iterator);
249
250
Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap()
251
}
252
253
/// Creates a [`MutableBinaryArray`] from an iterator of trusted length.
254
#[inline]
255
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
256
where
257
P: AsRef<[u8]>,
258
I: TrustedLen<Item = Option<P>>,
259
{
260
// soundness: I is `TrustedLen`
261
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
262
}
263
264
/// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
265
///
266
/// # Safety
267
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
268
/// I.e. that `size_hint().1` correctly reports its length.
269
#[inline]
270
pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(
271
iterator: I,
272
) -> Self {
273
let (offsets, values) = trusted_len_values_iter(iterator);
274
Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
275
}
276
277
/// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.
278
#[inline]
279
pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(
280
iterator: I,
281
) -> Self {
282
// soundness: I is `TrustedLen`
283
unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }
284
}
285
286
/// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
287
///
288
/// # Safety
289
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
290
/// I.e. that `size_hint().1` correctly reports its length.
291
#[inline]
292
pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
293
iterator: I,
294
) -> std::result::Result<Self, E>
295
where
296
P: AsRef<[u8]>,
297
I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
298
{
299
let iterator = iterator.into_iter();
300
301
// soundness: assumed trusted len
302
let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;
303
Ok(Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap())
304
}
305
306
/// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.
307
#[inline]
308
pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
309
where
310
P: AsRef<[u8]>,
311
I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
312
{
313
// soundness: I: TrustedLen
314
unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
315
}
316
317
/// Extends the [`MutableBinaryArray`] from an iterator of trusted length.
318
/// This differs from `extend_trusted_len` which accepts iterator of optional values.
319
#[inline]
320
pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
321
where
322
P: AsRef<[u8]>,
323
I: TrustedLen<Item = P>,
324
{
325
// SAFETY: The iterator is `TrustedLen`
326
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
327
}
328
329
/// Extends the [`MutableBinaryArray`] from an iterator of values.
330
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
331
#[inline]
332
pub fn extend_values<I, P>(&mut self, iterator: I)
333
where
334
P: AsRef<[u8]>,
335
I: Iterator<Item = P>,
336
{
337
let length = self.values.len();
338
self.values.extend(iterator);
339
let additional = self.values.len() - length;
340
341
if let Some(validity) = self.validity.as_mut() {
342
validity.extend_constant(additional, true);
343
}
344
}
345
346
/// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
347
/// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
348
/// values.
349
///
350
/// # Safety
351
/// The `iterator` must be [`TrustedLen`]
352
#[inline]
353
pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
354
where
355
P: AsRef<[u8]>,
356
I: Iterator<Item = P>,
357
{
358
let length = self.values.len();
359
self.values.extend_trusted_len_unchecked(iterator);
360
let additional = self.values.len() - length;
361
362
if let Some(validity) = self.validity.as_mut() {
363
validity.extend_constant(additional, true);
364
}
365
}
366
367
/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
368
#[inline]
369
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
370
where
371
P: AsRef<[u8]>,
372
I: TrustedLen<Item = Option<P>>,
373
{
374
// SAFETY: The iterator is `TrustedLen`
375
unsafe { self.extend_trusted_len_unchecked(iterator) }
376
}
377
378
/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]
379
///
380
/// # Safety
381
/// The `iterator` must be [`TrustedLen`]
382
#[inline]
383
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
384
where
385
P: AsRef<[u8]>,
386
I: Iterator<Item = Option<P>>,
387
{
388
if self.validity.is_none() {
389
let mut validity = MutableBitmap::new();
390
validity.extend_constant(self.len(), true);
391
self.validity = Some(validity);
392
}
393
394
self.values
395
.extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);
396
}
397
398
/// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`.
399
pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {
400
let (offsets, values) = values_iter(iterator);
401
Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()
402
}
403
404
/// Extend with a fallible iterator
405
pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
406
where
407
E: std::error::Error,
408
I: IntoIterator<Item = std::result::Result<Option<T>, E>>,
409
T: AsRef<[u8]>,
410
{
411
let mut iter = iter.into_iter();
412
self.reserve(iter.size_hint().0, 0);
413
iter.try_for_each(|x| {
414
self.push(x?);
415
Ok(())
416
})
417
}
418
}
419
420
impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {
421
fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
422
self.try_extend(iter).unwrap();
423
}
424
}
425
426
impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {
427
fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
428
let mut iter = iter.into_iter();
429
self.reserve(iter.size_hint().0, 0);
430
iter.try_for_each(|x| self.try_push(x))
431
}
432
}
433
434
impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {
435
fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {
436
match value {
437
Some(value) => {
438
self.values.try_push(value.as_ref())?;
439
440
if let Some(validity) = &mut self.validity {
441
validity.push(true)
442
}
443
},
444
None => {
445
self.values.push("");
446
match &mut self.validity {
447
Some(validity) => validity.push(false),
448
None => self.init_validity(),
449
}
450
},
451
}
452
Ok(())
453
}
454
}
455
456
impl<O: Offset> PartialEq for MutableBinaryArray<O> {
457
fn eq(&self, other: &Self) -> bool {
458
self.iter().eq(other.iter())
459
}
460
}
461
462
impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {
463
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
464
extend_validity(self.len(), &mut self.validity, &other.validity);
465
466
self.values.try_extend_from_self(&other.values)
467
}
468
}
469
470