Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/binary/mutable_values.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_error::{PolarsResult, polars_bail};
4
5
use super::{BinaryArray, MutableBinaryArray};
6
use crate::array::physical_binary::*;
7
use crate::array::specification::try_check_offsets_bounds;
8
use crate::array::{
9
Array, ArrayAccessor, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush,
10
};
11
use crate::bitmap::MutableBitmap;
12
use crate::datatypes::ArrowDataType;
13
use crate::offset::{Offset, Offsets};
14
use crate::trusted_len::TrustedLen;
15
16
/// A [`MutableArray`] that builds a [`BinaryArray`]. It differs
17
/// from [`MutableBinaryArray`] in that it builds non-null [`BinaryArray`].
18
#[derive(Debug, Clone)]
19
pub struct MutableBinaryValuesArray<O: Offset> {
20
dtype: ArrowDataType,
21
offsets: Offsets<O>,
22
values: Vec<u8>,
23
}
24
25
impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O> {
26
fn from(other: MutableBinaryValuesArray<O>) -> Self {
27
BinaryArray::<O>::new(other.dtype, other.offsets.into(), other.values.into(), None)
28
}
29
}
30
31
impl<O: Offset> From<MutableBinaryValuesArray<O>> for MutableBinaryArray<O> {
32
fn from(other: MutableBinaryValuesArray<O>) -> Self {
33
MutableBinaryArray::<O>::try_new(other.dtype, other.offsets, other.values, None)
34
.expect("MutableBinaryValuesArray is consistent with MutableBinaryArray")
35
}
36
}
37
38
impl<O: Offset> Default for MutableBinaryValuesArray<O> {
39
fn default() -> Self {
40
Self::new()
41
}
42
}
43
44
impl<O: Offset> MutableBinaryValuesArray<O> {
45
/// Returns an empty [`MutableBinaryValuesArray`].
46
pub fn new() -> Self {
47
Self {
48
dtype: Self::default_dtype(),
49
offsets: Offsets::new(),
50
values: Vec::<u8>::new(),
51
}
52
}
53
54
/// Returns a [`MutableBinaryValuesArray`] created from its internal representation.
55
///
56
/// # Errors
57
/// This function returns an error iff:
58
/// * The last offset is not equal to the values' length.
59
/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
60
/// # Implementation
61
/// This function is `O(1)`
62
pub fn try_new(
63
dtype: ArrowDataType,
64
offsets: Offsets<O>,
65
values: Vec<u8>,
66
) -> PolarsResult<Self> {
67
try_check_offsets_bounds(&offsets, values.len())?;
68
69
if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {
70
polars_bail!(ComputeError: "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",)
71
}
72
73
Ok(Self {
74
dtype,
75
offsets,
76
values,
77
})
78
}
79
80
/// Returns the default [`ArrowDataType`] of this container: [`ArrowDataType::Utf8`] or [`ArrowDataType::LargeUtf8`]
81
/// depending on the generic [`Offset`].
82
pub fn default_dtype() -> ArrowDataType {
83
BinaryArray::<O>::default_dtype()
84
}
85
86
/// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items.
87
pub fn with_capacity(capacity: usize) -> Self {
88
Self::with_capacities(capacity, 0)
89
}
90
91
/// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items and values.
92
pub fn with_capacities(capacity: usize, values: usize) -> Self {
93
Self {
94
dtype: Self::default_dtype(),
95
offsets: Offsets::<O>::with_capacity(capacity),
96
values: Vec::<u8>::with_capacity(values),
97
}
98
}
99
100
/// returns its values.
101
#[inline]
102
pub fn values(&self) -> &Vec<u8> {
103
&self.values
104
}
105
106
/// returns its offsets.
107
#[inline]
108
pub fn offsets(&self) -> &Offsets<O> {
109
&self.offsets
110
}
111
112
/// Reserves `additional` elements and `additional_values` on the values.
113
#[inline]
114
pub fn reserve(&mut self, additional: usize, additional_values: usize) {
115
self.offsets.reserve(additional);
116
self.values.reserve(additional_values);
117
}
118
119
/// Returns the capacity in number of items
120
pub fn capacity(&self) -> usize {
121
self.offsets.capacity()
122
}
123
124
/// Returns the length of this array
125
#[inline]
126
pub fn len(&self) -> usize {
127
self.offsets.len_proxy()
128
}
129
130
/// Pushes a new item to the array.
131
/// # Panic
132
/// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
133
#[inline]
134
pub fn push<T: AsRef<[u8]>>(&mut self, value: T) {
135
self.try_push(value).unwrap()
136
}
137
138
/// Pop the last entry from [`MutableBinaryValuesArray`].
139
/// This function returns `None` iff this array is empty.
140
pub fn pop(&mut self) -> Option<Vec<u8>> {
141
if self.len() == 0 {
142
return None;
143
}
144
self.offsets.pop()?;
145
let start = self.offsets.last().to_usize();
146
let value = self.values.split_off(start);
147
Some(value.to_vec())
148
}
149
150
/// Returns the value of the element at index `i`.
151
/// # Panic
152
/// This function panics iff `i >= self.len`.
153
#[inline]
154
pub fn value(&self, i: usize) -> &[u8] {
155
assert!(i < self.len());
156
unsafe { self.value_unchecked(i) }
157
}
158
159
/// Returns the value of the element at index `i`.
160
///
161
/// # Safety
162
/// This function is safe iff `i < self.len`.
163
#[inline]
164
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
165
// soundness: the invariant of the function
166
let (start, end) = self.offsets.start_end(i);
167
168
// soundness: the invariant of the struct
169
self.values.get_unchecked(start..end)
170
}
171
172
/// Returns an iterator of `&[u8]`
173
pub fn iter(&self) -> ArrayValuesIter<'_, Self> {
174
ArrayValuesIter::new(self)
175
}
176
177
/// Shrinks the capacity of the [`MutableBinaryValuesArray`] to fit its current length.
178
pub fn shrink_to_fit(&mut self) {
179
self.values.shrink_to_fit();
180
self.offsets.shrink_to_fit();
181
}
182
183
/// Extract the low-end APIs from the [`MutableBinaryValuesArray`].
184
pub fn into_inner(self) -> (ArrowDataType, Offsets<O>, Vec<u8>) {
185
(self.dtype, self.offsets, self.values)
186
}
187
}
188
189
impl<O: Offset> MutableArray for MutableBinaryValuesArray<O> {
190
fn len(&self) -> usize {
191
self.len()
192
}
193
194
fn validity(&self) -> Option<&MutableBitmap> {
195
None
196
}
197
198
fn as_box(&mut self) -> Box<dyn Array> {
199
let (dtype, offsets, values) = std::mem::take(self).into_inner();
200
BinaryArray::new(dtype, offsets.into(), values.into(), None).boxed()
201
}
202
203
fn as_arc(&mut self) -> Arc<dyn Array> {
204
let (dtype, offsets, values) = std::mem::take(self).into_inner();
205
BinaryArray::new(dtype, offsets.into(), values.into(), None).arced()
206
}
207
208
fn dtype(&self) -> &ArrowDataType {
209
&self.dtype
210
}
211
212
fn as_any(&self) -> &dyn std::any::Any {
213
self
214
}
215
216
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
217
self
218
}
219
220
#[inline]
221
fn push_null(&mut self) {
222
self.push::<&[u8]>(b"")
223
}
224
225
fn reserve(&mut self, additional: usize) {
226
self.reserve(additional, 0)
227
}
228
229
fn shrink_to_fit(&mut self) {
230
self.shrink_to_fit()
231
}
232
}
233
234
impl<O: Offset, P: AsRef<[u8]>> FromIterator<P> for MutableBinaryValuesArray<O> {
235
fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {
236
let (offsets, values) = values_iter(iter.into_iter());
237
Self::try_new(Self::default_dtype(), offsets, values).unwrap()
238
}
239
}
240
241
impl<O: Offset> MutableBinaryValuesArray<O> {
242
pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(
243
&mut self,
244
validity: &mut MutableBitmap,
245
iterator: I,
246
) where
247
P: AsRef<[u8]>,
248
I: Iterator<Item = Option<P>>,
249
{
250
extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);
251
}
252
253
/// Extends the [`MutableBinaryValuesArray`] from a [`TrustedLen`]
254
#[inline]
255
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
256
where
257
P: AsRef<[u8]>,
258
I: TrustedLen<Item = P>,
259
{
260
unsafe { self.extend_trusted_len_unchecked(iterator) }
261
}
262
263
/// Extends [`MutableBinaryValuesArray`] from an iterator of trusted len.
264
///
265
/// # Safety
266
/// The iterator must be trusted len.
267
#[inline]
268
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
269
where
270
P: AsRef<[u8]>,
271
I: Iterator<Item = P>,
272
{
273
extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);
274
}
275
276
/// Creates a [`MutableBinaryValuesArray`] from a [`TrustedLen`]
277
#[inline]
278
pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
279
where
280
P: AsRef<[u8]>,
281
I: TrustedLen<Item = P>,
282
{
283
// soundness: I is `TrustedLen`
284
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
285
}
286
287
/// Returns a new [`MutableBinaryValuesArray`] from an iterator of trusted length.
288
///
289
/// # Safety
290
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
291
/// I.e. that `size_hint().1` correctly reports its length.
292
#[inline]
293
pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
294
where
295
P: AsRef<[u8]>,
296
I: Iterator<Item = P>,
297
{
298
let (offsets, values) = trusted_len_values_iter(iterator);
299
Self::try_new(Self::default_dtype(), offsets, values).unwrap()
300
}
301
302
/// Returns a new [`MutableBinaryValuesArray`] from an iterator.
303
/// # Error
304
/// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value.
305
/// (`i32::MAX` or `i64::MAX` respectively).
306
pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = P>>(iter: I) -> PolarsResult<Self> {
307
let iterator = iter.into_iter();
308
let (lower, _) = iterator.size_hint();
309
let mut array = Self::with_capacity(lower);
310
for item in iterator {
311
array.try_push(item)?;
312
}
313
Ok(array)
314
}
315
316
/// Extend with a fallible iterator
317
pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>
318
where
319
E: std::error::Error,
320
I: IntoIterator<Item = std::result::Result<T, E>>,
321
T: AsRef<[u8]>,
322
{
323
let mut iter = iter.into_iter();
324
self.reserve(iter.size_hint().0, 0);
325
iter.try_for_each(|x| {
326
self.push(x?);
327
Ok(())
328
})
329
}
330
}
331
332
impl<O: Offset, T: AsRef<[u8]>> Extend<T> for MutableBinaryValuesArray<O> {
333
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
334
extend_from_values_iter(&mut self.offsets, &mut self.values, iter.into_iter());
335
}
336
}
337
338
impl<O: Offset, T: AsRef<[u8]>> TryExtend<T> for MutableBinaryValuesArray<O> {
339
fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> PolarsResult<()> {
340
let mut iter = iter.into_iter();
341
self.reserve(iter.size_hint().0, 0);
342
iter.try_for_each(|x| self.try_push(x))
343
}
344
}
345
346
impl<O: Offset, T: AsRef<[u8]>> TryPush<T> for MutableBinaryValuesArray<O> {
347
#[inline]
348
fn try_push(&mut self, value: T) -> PolarsResult<()> {
349
let bytes = value.as_ref();
350
self.values.extend_from_slice(bytes);
351
self.offsets.try_push(bytes.len())
352
}
353
}
354
355
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {
356
type Item = &'a [u8];
357
358
#[inline]
359
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
360
self.value_unchecked(index)
361
}
362
363
#[inline]
364
fn len(&self) -> usize {
365
self.len()
366
}
367
}
368
369
impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {
370
fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
371
self.values.extend_from_slice(&other.values);
372
self.offsets.try_extend_from_self(&other.offsets)
373
}
374
}
375
376