Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/binview/mod.rs
8402 views
1
#![allow(unsafe_op_in_unsafe_fn)]
2
//! See thread: https://lists.apache.org/thread/w88tpz76ox8h3rxkjl4so6rg3f1rv7wt
3
4
mod builder;
5
pub use builder::*;
6
mod ffi;
7
pub(super) mod fmt;
8
mod iterator;
9
mod mutable;
10
#[cfg(feature = "proptest")]
11
pub mod proptest;
12
mod view;
13
14
use std::any::Any;
15
use std::fmt::Debug;
16
use std::marker::PhantomData;
17
18
use polars_buffer::Buffer;
19
use polars_error::*;
20
use polars_utils::relaxed_cell::RelaxedCell;
21
22
use crate::array::Array;
23
use crate::bitmap::Bitmap;
24
use crate::datatypes::ArrowDataType;
25
26
mod private {
27
pub trait Sealed: Send + Sync {}
28
29
impl Sealed for str {}
30
impl Sealed for [u8] {}
31
}
32
pub use iterator::BinaryViewValueIter;
33
pub use mutable::MutableBinaryViewArray;
34
use polars_utils::aliases::{InitHashMaps, PlHashMap};
35
use private::Sealed;
36
37
use crate::array::binview::view::{validate_binary_views, validate_views_utf8_only};
38
use crate::array::iterator::NonNullValuesIter;
39
use crate::bitmap::utils::{BitmapIter, ZipValidity};
40
pub type BinaryViewArray = BinaryViewArrayGeneric<[u8]>;
41
pub type Utf8ViewArray = BinaryViewArrayGeneric<str>;
42
pub type BinaryViewArrayBuilder = BinaryViewArrayGenericBuilder<[u8]>;
43
pub type Utf8ViewArrayBuilder = BinaryViewArrayGenericBuilder<str>;
44
pub use view::{View, validate_utf8_views};
45
46
use super::Splitable;
47
48
pub type MutablePlString = MutableBinaryViewArray<str>;
49
pub type MutablePlBinary = MutableBinaryViewArray<[u8]>;
50
51
static BIN_VIEW_TYPE: ArrowDataType = ArrowDataType::BinaryView;
52
static UTF8_VIEW_TYPE: ArrowDataType = ArrowDataType::Utf8View;
53
54
// Growth parameters of view array buffers.
55
const DEFAULT_BLOCK_SIZE: usize = 8 * 1024;
56
const MAX_EXP_BLOCK_SIZE: usize = 16 * 1024 * 1024;
57
58
pub trait ViewType: Sealed + 'static + PartialEq + AsRef<Self> {
59
const IS_UTF8: bool;
60
const DATA_TYPE: ArrowDataType;
61
type Owned: Debug + Clone + Sync + Send + AsRef<Self>;
62
63
/// # Safety
64
/// The caller must ensure that `slice` is a valid view.
65
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self;
66
fn from_bytes(slice: &[u8]) -> Option<&Self>;
67
68
fn to_bytes(&self) -> &[u8];
69
70
#[allow(clippy::wrong_self_convention)]
71
fn into_owned(&self) -> Self::Owned;
72
73
fn dtype() -> &'static ArrowDataType;
74
}
75
76
impl ViewType for str {
77
const IS_UTF8: bool = true;
78
const DATA_TYPE: ArrowDataType = ArrowDataType::Utf8View;
79
type Owned = String;
80
81
#[inline(always)]
82
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
83
std::str::from_utf8_unchecked(slice)
84
}
85
#[inline(always)]
86
fn from_bytes(slice: &[u8]) -> Option<&Self> {
87
std::str::from_utf8(slice).ok()
88
}
89
90
#[inline(always)]
91
fn to_bytes(&self) -> &[u8] {
92
self.as_bytes()
93
}
94
95
fn into_owned(&self) -> Self::Owned {
96
self.to_string()
97
}
98
fn dtype() -> &'static ArrowDataType {
99
&UTF8_VIEW_TYPE
100
}
101
}
102
103
impl ViewType for [u8] {
104
const IS_UTF8: bool = false;
105
const DATA_TYPE: ArrowDataType = ArrowDataType::BinaryView;
106
type Owned = Vec<u8>;
107
108
#[inline(always)]
109
unsafe fn from_bytes_unchecked(slice: &[u8]) -> &Self {
110
slice
111
}
112
#[inline(always)]
113
fn from_bytes(slice: &[u8]) -> Option<&Self> {
114
Some(slice)
115
}
116
117
#[inline(always)]
118
fn to_bytes(&self) -> &[u8] {
119
self
120
}
121
122
fn into_owned(&self) -> Self::Owned {
123
self.to_vec()
124
}
125
126
fn dtype() -> &'static ArrowDataType {
127
&BIN_VIEW_TYPE
128
}
129
}
130
131
pub struct BinaryViewArrayGeneric<T: ViewType + ?Sized> {
132
dtype: ArrowDataType,
133
views: Buffer<View>,
134
buffers: Buffer<Buffer<u8>>,
135
validity: Option<Bitmap>,
136
phantom: PhantomData<T>,
137
/// Total bytes length if we would concatenate them all.
138
total_bytes_len: RelaxedCell<u64>,
139
/// Total bytes in the buffer (excluding remaining capacity)
140
total_buffer_len: usize,
141
}
142
143
impl<T: ViewType + ?Sized> PartialEq for BinaryViewArrayGeneric<T> {
144
fn eq(&self, other: &Self) -> bool {
145
self.len() == other.len() && self.into_iter().zip(other).all(|(l, r)| l == r)
146
}
147
}
148
149
impl<T: ViewType + ?Sized> Clone for BinaryViewArrayGeneric<T> {
150
fn clone(&self) -> Self {
151
Self {
152
dtype: self.dtype.clone(),
153
views: self.views.clone(),
154
buffers: self.buffers.clone(),
155
validity: self.validity.clone(),
156
phantom: Default::default(),
157
total_bytes_len: self.total_bytes_len.clone(),
158
total_buffer_len: self.total_buffer_len,
159
}
160
}
161
}
162
163
unsafe impl<T: ViewType + ?Sized> Send for BinaryViewArrayGeneric<T> {}
164
unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewArrayGeneric<T> {}
165
166
const UNKNOWN_LEN: u64 = u64::MAX;
167
168
impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
169
/// # Safety
170
/// The caller must ensure
171
/// - the data is valid utf8 (if required)
172
/// - The offsets match the buffers.
173
pub unsafe fn new_unchecked(
174
dtype: ArrowDataType,
175
views: Buffer<View>,
176
buffers: Buffer<Buffer<u8>>,
177
validity: Option<Bitmap>,
178
total_bytes_len: Option<usize>,
179
total_buffer_len: usize,
180
) -> Self {
181
// Verify the invariants
182
#[cfg(debug_assertions)]
183
{
184
if let Some(validity) = validity.as_ref() {
185
assert_eq!(validity.len(), views.len());
186
}
187
188
// @TODO: Enable this. There are still some bugs but disabled temporarily to get some fixes in.
189
// let mut actual_total_buffer_len = 0;
190
// let mut actual_total_bytes_len = 0;
191
192
// for buffer in buffers.iter() {
193
// actual_total_buffer_len += buffer.len();
194
// }
195
196
for (i, view) in views.iter().enumerate() {
197
let is_valid = validity.as_ref().is_none_or(|v| v.get_bit(i));
198
199
if !is_valid {
200
continue;
201
}
202
203
// actual_total_bytes_len += view.length as usize;
204
if view.length > View::MAX_INLINE_SIZE {
205
assert!((view.buffer_idx as usize) < (buffers.len()));
206
assert!(
207
view.offset as usize + view.length as usize
208
<= buffers[view.buffer_idx as usize].len()
209
);
210
}
211
}
212
213
// assert_eq!(actual_total_buffer_len, total_buffer_len);
214
// if let Some(len) = total_bytes_len {
215
// assert_eq!(actual_total_bytes_len, len);
216
// }
217
}
218
219
Self {
220
dtype,
221
views,
222
buffers,
223
validity,
224
phantom: Default::default(),
225
total_bytes_len: RelaxedCell::from(
226
total_bytes_len.map(|l| l as u64).unwrap_or(UNKNOWN_LEN),
227
),
228
total_buffer_len,
229
}
230
}
231
232
/// Create a new BinaryViewArray but initialize a statistics compute.
233
///
234
/// # Safety
235
/// The caller must ensure the invariants
236
pub unsafe fn new_unchecked_unknown_md(
237
dtype: ArrowDataType,
238
views: Buffer<View>,
239
buffers: Buffer<Buffer<u8>>,
240
validity: Option<Bitmap>,
241
total_buffer_len: Option<usize>,
242
) -> Self {
243
let total_bytes_len = None;
244
let total_buffer_len =
245
total_buffer_len.unwrap_or_else(|| buffers.iter().map(|b| b.len()).sum());
246
Self::new_unchecked(
247
dtype,
248
views,
249
buffers,
250
validity,
251
total_bytes_len,
252
total_buffer_len,
253
)
254
}
255
256
pub fn data_buffers(&self) -> &Buffer<Buffer<u8>> {
257
&self.buffers
258
}
259
260
pub fn data_buffers_mut(&mut self) -> &mut Buffer<Buffer<u8>> {
261
&mut self.buffers
262
}
263
264
pub fn variadic_buffer_lengths(&self) -> Vec<i64> {
265
self.buffers.iter().map(|buf| buf.len() as i64).collect()
266
}
267
268
pub fn views(&self) -> &Buffer<View> {
269
&self.views
270
}
271
272
pub fn into_views(self) -> Vec<View> {
273
self.views.to_vec()
274
}
275
276
pub fn into_inner(
277
self,
278
) -> (
279
Buffer<View>,
280
Buffer<Buffer<u8>>,
281
Option<Bitmap>,
282
Option<usize>,
283
usize,
284
) {
285
let total_bytes_len = self.try_total_bytes_len();
286
let views = self.views;
287
let buffers = self.buffers;
288
let validity = self.validity;
289
290
(
291
views,
292
buffers,
293
validity,
294
total_bytes_len,
295
self.total_buffer_len,
296
)
297
}
298
299
/// Apply a function over the views. This can be used to update views in operations like slicing.
300
///
301
/// # Safety
302
/// All invariants of the views must be maintained.
303
pub unsafe fn apply_views<F: FnMut(View, &T) -> View>(&self, mut update_view: F) -> Self {
304
let arr = self.clone();
305
let (views, buffers, validity, _total_bytes_len, total_buffer_len) = arr.into_inner();
306
307
let mut total_bytes_len = 0;
308
let mut views = views.to_vec();
309
for v in views.iter_mut() {
310
let str_slice = T::from_bytes_unchecked(v.get_slice_unchecked(&buffers));
311
*v = update_view(*v, str_slice);
312
total_bytes_len += v.length as usize;
313
}
314
315
let len_valid = validity.is_none();
316
Self::new_unchecked(
317
self.dtype.clone(),
318
views.into(),
319
buffers,
320
validity,
321
len_valid.then_some(total_bytes_len),
322
total_buffer_len,
323
)
324
}
325
326
/// Apply a function to the views as a mutable slice.
327
///
328
/// # Safety
329
/// All invariants of the views must be maintained.
330
pub unsafe fn with_views_mut<F: FnOnce(&mut [View])>(&mut self, f: F) {
331
self.total_bytes_len.store(UNKNOWN_LEN);
332
if let Some(views) = self.views.get_mut_slice() {
333
f(views)
334
} else {
335
let mut views = self.views.as_slice().to_vec();
336
f(&mut views);
337
self.views = Buffer::from(views);
338
}
339
}
340
341
pub fn try_new(
342
dtype: ArrowDataType,
343
views: Buffer<View>,
344
buffers: Buffer<Buffer<u8>>,
345
validity: Option<Bitmap>,
346
) -> PolarsResult<Self> {
347
if T::IS_UTF8 {
348
validate_utf8_views(views.as_ref(), buffers.as_ref())?;
349
} else {
350
validate_binary_views(views.as_ref(), buffers.as_ref())?;
351
}
352
353
if let Some(validity) = &validity {
354
polars_ensure!(validity.len()== views.len(), ComputeError: "validity mask length must match the number of values" )
355
}
356
357
unsafe {
358
Ok(Self::new_unchecked_unknown_md(
359
dtype, views, buffers, validity, None,
360
))
361
}
362
}
363
364
/// Creates an empty [`BinaryViewArrayGeneric`], i.e. whose `.len` is zero.
365
#[inline]
366
pub fn new_empty(dtype: ArrowDataType) -> Self {
367
unsafe { Self::new_unchecked(dtype, Buffer::new(), Buffer::new(), None, Some(0), 0) }
368
}
369
370
/// Returns a new null [`BinaryViewArrayGeneric`] of `length`.
371
#[inline]
372
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
373
let validity = Some(Bitmap::new_zeroed(length));
374
unsafe {
375
Self::new_unchecked(
376
dtype,
377
Buffer::zeroed(length),
378
Buffer::new(),
379
validity,
380
Some(0),
381
0,
382
)
383
}
384
}
385
386
/// Returns the element at index `i`
387
/// # Panics
388
/// iff `i >= self.len()`
389
#[inline]
390
pub fn value(&self, i: usize) -> &T {
391
assert!(i < self.len());
392
unsafe { self.value_unchecked(i) }
393
}
394
395
/// Returns the element at index `i`
396
///
397
/// # Safety
398
/// Assumes that the `i < self.len`.
399
#[inline]
400
pub unsafe fn value_unchecked(&self, i: usize) -> &T {
401
let v = self.views.get_unchecked(i);
402
T::from_bytes_unchecked(v.get_slice_unchecked(&self.buffers))
403
}
404
405
/// Returns the element at index `i`, or None if it is null.
406
/// # Panics
407
/// iff `i >= self.len()`
408
#[inline]
409
pub fn get(&self, i: usize) -> Option<&T> {
410
assert!(i < self.len());
411
unsafe { self.get_unchecked(i) }
412
}
413
414
/// Returns the element at index `i`, or None if it is null.
415
///
416
/// # Safety
417
/// Assumes that the `i < self.len`.
418
#[inline]
419
pub unsafe fn get_unchecked(&self, i: usize) -> Option<&T> {
420
if self
421
.validity
422
.as_ref()
423
.is_none_or(|v| v.get_bit_unchecked(i))
424
{
425
let v = self.views.get_unchecked(i);
426
Some(T::from_bytes_unchecked(
427
v.get_slice_unchecked(&self.buffers),
428
))
429
} else {
430
None
431
}
432
}
433
434
/// Returns an iterator of `Option<&T>` over every element of this array.
435
pub fn iter(&self) -> ZipValidity<&T, BinaryViewValueIter<'_, T>, BitmapIter<'_>> {
436
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
437
}
438
439
/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
440
pub fn values_iter(&self) -> BinaryViewValueIter<'_, T> {
441
BinaryViewValueIter::new(self)
442
}
443
444
pub fn len_iter(&self) -> impl Iterator<Item = u32> + '_ {
445
self.views.iter().map(|v| v.length)
446
}
447
448
/// Returns an iterator of the non-null values.
449
pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BinaryViewArrayGeneric<T>> {
450
NonNullValuesIter::new(self, self.validity())
451
}
452
453
/// Returns an iterator of the non-null values.
454
pub fn non_null_views_iter(&self) -> NonNullValuesIter<'_, Buffer<View>> {
455
NonNullValuesIter::new(self.views(), self.validity())
456
}
457
458
impl_sliced!();
459
impl_into_array!();
460
461
/// Returns this array with a new validity.
462
/// # Panic
463
/// Panics iff `validity.len() != self.len()`.
464
#[must_use]
465
#[inline]
466
pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
467
self.set_validity(validity);
468
self
469
}
470
471
/// Sets the validity of this array.
472
/// # Panics
473
/// This function panics iff `values.len() != self.len()`.
474
#[inline]
475
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
476
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
477
panic!("validity must be equal to the array's length")
478
}
479
self.total_bytes_len.store(UNKNOWN_LEN);
480
self.validity = validity;
481
}
482
483
/// Takes the validity of this array, leaving it without a validity mask.
484
#[inline]
485
pub fn take_validity(&mut self) -> Option<Bitmap> {
486
self.total_bytes_len.store(UNKNOWN_LEN);
487
self.validity.take()
488
}
489
490
pub fn from_slice<S: AsRef<T>, P: AsRef<[Option<S>]>>(slice: P) -> Self {
491
let mutable = MutableBinaryViewArray::from_iterator(
492
slice.as_ref().iter().map(|opt_v| opt_v.as_ref()),
493
);
494
mutable.into()
495
}
496
497
pub fn from_slice_values<S: AsRef<T>, P: AsRef<[S]>>(slice: P) -> Self {
498
let mutable =
499
MutableBinaryViewArray::from_values_iter(slice.as_ref().iter().map(|v| v.as_ref()));
500
mutable.into()
501
}
502
503
/// Get the total length of bytes that it would take to concatenate all binary/str values in this array.
504
pub fn total_bytes_len(&self) -> usize {
505
let total = self.total_bytes_len.load();
506
if total == UNKNOWN_LEN {
507
let total = ZipValidity::new_with_validity(self.len_iter(), self.validity.as_ref())
508
.map(|v| v.unwrap_or(0) as usize)
509
.sum::<usize>();
510
self.total_bytes_len.store(total as u64);
511
total
512
} else {
513
total as usize
514
}
515
}
516
517
/// Like total_bytes_len() but if unavailable will not force a computation.
518
pub fn try_total_bytes_len(&self) -> Option<usize> {
519
let b = self.total_bytes_len.load();
520
(b != UNKNOWN_LEN).then_some(b as usize)
521
}
522
523
/// Get the length of bytes that are stored in the variadic buffers.
524
pub fn total_buffer_len(&self) -> usize {
525
self.total_buffer_len
526
}
527
528
fn total_unshared_buffer_len(&self) -> usize {
529
// XXX: it is O(n), not O(1).
530
// Given this function is only called in `maybe_gc()`,
531
// it may not be worthy to add an extra field for this.
532
self.buffers
533
.iter()
534
.map(|buf| {
535
if buf.storage_refcount() > 1 {
536
0
537
} else {
538
buf.len()
539
}
540
})
541
.sum()
542
}
543
544
#[inline(always)]
545
pub fn len(&self) -> usize {
546
self.views.len()
547
}
548
549
/// Garbage collect
550
pub fn gc(self) -> Self {
551
if self.buffers.is_empty() {
552
return self;
553
}
554
let mut mutable = MutableBinaryViewArray::with_capacity(self.len());
555
let buffers = self.buffers.as_ref();
556
557
for view in self.views.as_ref() {
558
unsafe { mutable.push_view_unchecked(*view, buffers) }
559
}
560
mutable.freeze().with_validity(self.validity)
561
}
562
563
pub fn deshare(&self) -> Self {
564
if self.buffers.storage_refcount() == 1
565
&& self.buffers.iter().all(|b| b.storage_refcount() == 1)
566
{
567
return self.clone();
568
}
569
self.clone().gc()
570
}
571
572
pub fn is_sliced(&self) -> bool {
573
!std::ptr::eq(self.views.as_ptr(), self.views.storage_ptr())
574
}
575
576
pub fn maybe_gc(self) -> Self {
577
const GC_MINIMUM_SAVINGS: usize = 16 * 1024; // At least 16 KiB.
578
579
if self.total_buffer_len <= GC_MINIMUM_SAVINGS {
580
return self;
581
}
582
583
if self.buffers.storage_refcount() != 1 {
584
// There are multiple holders of this `buffers`.
585
// If we allow gc in this case,
586
// it may end up copying the same content multiple times.
587
return self;
588
}
589
590
// Subtract the maximum amount of inlined strings to get a lower bound
591
// on the number of buffer bytes needed (assuming no dedup).
592
let total_bytes_len = self.total_bytes_len();
593
let buffer_req_lower_bound = total_bytes_len.saturating_sub(self.len() * 12);
594
595
let lower_bound_mem_usage_post_gc = self.len() * 16 + buffer_req_lower_bound;
596
// Use unshared buffer len. Shared buffer won't be freed; no savings.
597
let cur_mem_usage = self.len() * 16 + self.total_unshared_buffer_len();
598
let savings_upper_bound = cur_mem_usage.saturating_sub(lower_bound_mem_usage_post_gc);
599
600
if savings_upper_bound >= GC_MINIMUM_SAVINGS
601
&& cur_mem_usage >= 4 * lower_bound_mem_usage_post_gc
602
{
603
self.gc()
604
} else {
605
self
606
}
607
}
608
609
pub fn make_mut(self) -> MutableBinaryViewArray<T> {
610
let views = self.views.to_vec();
611
let completed_buffers = self.buffers.to_vec();
612
let validity = self.validity.map(|bitmap| bitmap.make_mut());
613
614
// We need to know the total_bytes_len if we are going to mutate it.
615
let mut total_bytes_len = self.total_bytes_len.load();
616
if total_bytes_len == UNKNOWN_LEN {
617
total_bytes_len = views.iter().map(|view| view.length as u64).sum();
618
}
619
let total_bytes_len = total_bytes_len as usize;
620
621
MutableBinaryViewArray {
622
views,
623
completed_buffers,
624
in_progress_buffer: vec![],
625
validity,
626
phantom: Default::default(),
627
total_bytes_len,
628
total_buffer_len: self.total_buffer_len,
629
stolen_buffers: PlHashMap::new(),
630
}
631
}
632
}
633
634
impl BinaryViewArray {
635
/// Validate the underlying bytes on UTF-8.
636
pub fn validate_utf8(&self) -> PolarsResult<()> {
637
// SAFETY: views are correct
638
unsafe { validate_views_utf8_only(&self.views, &self.buffers, 0) }
639
}
640
641
/// Convert [`BinaryViewArray`] to [`Utf8ViewArray`].
642
pub fn to_utf8view(&self) -> PolarsResult<Utf8ViewArray> {
643
self.validate_utf8()?;
644
unsafe { Ok(self.to_utf8view_unchecked()) }
645
}
646
647
/// Convert [`BinaryViewArray`] to [`Utf8ViewArray`] without checking UTF-8.
648
///
649
/// # Safety
650
/// The caller must ensure the underlying data is valid UTF-8.
651
pub unsafe fn to_utf8view_unchecked(&self) -> Utf8ViewArray {
652
Utf8ViewArray::new_unchecked(
653
ArrowDataType::Utf8View,
654
self.views.clone(),
655
self.buffers.clone(),
656
self.validity.clone(),
657
self.try_total_bytes_len(),
658
self.total_buffer_len,
659
)
660
}
661
}
662
663
impl Utf8ViewArray {
664
pub fn to_binview(&self) -> BinaryViewArray {
665
// SAFETY: same invariants.
666
unsafe {
667
BinaryViewArray::new_unchecked(
668
ArrowDataType::BinaryView,
669
self.views.clone(),
670
self.buffers.clone(),
671
self.validity.clone(),
672
self.try_total_bytes_len(),
673
self.total_buffer_len,
674
)
675
}
676
}
677
}
678
679
impl<T: ViewType + ?Sized> Array for BinaryViewArrayGeneric<T> {
680
fn as_any(&self) -> &dyn Any {
681
self
682
}
683
684
fn as_any_mut(&mut self) -> &mut dyn Any {
685
self
686
}
687
688
#[inline(always)]
689
fn len(&self) -> usize {
690
BinaryViewArrayGeneric::len(self)
691
}
692
693
#[inline(always)]
694
fn dtype(&self) -> &ArrowDataType {
695
&self.dtype
696
}
697
698
#[inline(always)]
699
fn dtype_mut(&mut self) -> &mut ArrowDataType {
700
&mut self.dtype
701
}
702
703
fn validity(&self) -> Option<&Bitmap> {
704
self.validity.as_ref()
705
}
706
707
fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
708
let (lhs, rhs) = Splitable::split_at(self, offset);
709
(Box::new(lhs), Box::new(rhs))
710
}
711
712
unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
713
let (lhs, rhs) = unsafe { Splitable::split_at_unchecked(self, offset) };
714
(Box::new(lhs), Box::new(rhs))
715
}
716
717
fn slice(&mut self, offset: usize, length: usize) {
718
assert!(
719
offset + length <= self.len(),
720
"the offset of the new Buffer cannot exceed the existing length"
721
);
722
unsafe { self.slice_unchecked(offset, length) }
723
}
724
725
unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
726
debug_assert!(offset + length <= self.len());
727
self.validity = self
728
.validity
729
.take()
730
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
731
.filter(|bitmap| bitmap.unset_bits() > 0);
732
self.views.slice_in_place_unchecked(offset..offset + length);
733
self.total_bytes_len.store(UNKNOWN_LEN)
734
}
735
736
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
737
debug_assert!(
738
validity.as_ref().is_none_or(|v| v.len() == self.len()),
739
"{} != {}",
740
validity.as_ref().unwrap().len(),
741
self.len()
742
);
743
744
let mut new = self.clone();
745
new.validity = validity;
746
Box::new(new)
747
}
748
749
fn to_boxed(&self) -> Box<dyn Array> {
750
Box::new(self.clone())
751
}
752
}
753
754
impl<T: ViewType + ?Sized> Splitable for BinaryViewArrayGeneric<T> {
755
fn check_bound(&self, offset: usize) -> bool {
756
offset <= self.len()
757
}
758
759
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
760
let (lhs_views, rhs_views) = unsafe { self.views.split_at_unchecked(offset) };
761
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
762
763
unsafe {
764
(
765
Self::new_unchecked(
766
self.dtype.clone(),
767
lhs_views,
768
self.buffers.clone(),
769
lhs_validity,
770
(offset == 0).then_some(0),
771
self.total_buffer_len(),
772
),
773
Self::new_unchecked(
774
self.dtype.clone(),
775
rhs_views,
776
self.buffers.clone(),
777
rhs_validity,
778
(offset == self.len()).then_some(0),
779
self.total_buffer_len(),
780
),
781
)
782
}
783
}
784
}
785
786