Path: blob/main/crates/polars-arrow/src/array/binary/builder.rs
6939 views
use polars_utils::IdxSize;12use crate::array::BinaryArray;3use crate::array::builder::{ShareStrategy, StaticArrayBuilder};4use crate::bitmap::OptBitmapBuilder;5use crate::buffer::Buffer;6use crate::datatypes::ArrowDataType;7use crate::offset::{Offset, Offsets, OffsetsBuffer};89pub struct BinaryArrayBuilder<O: Offset> {10dtype: ArrowDataType,11offsets: Offsets<O>,12values: Vec<u8>,13validity: OptBitmapBuilder,14}1516impl<O: Offset> BinaryArrayBuilder<O> {17pub fn new(dtype: ArrowDataType) -> Self {18Self {19dtype,20offsets: Offsets::new(),21values: Vec::new(),22validity: OptBitmapBuilder::default(),23}24}25}2627impl<O: Offset> StaticArrayBuilder for BinaryArrayBuilder<O> {28type Array = BinaryArray<O>;2930fn dtype(&self) -> &ArrowDataType {31&self.dtype32}3334fn reserve(&mut self, additional: usize) {35self.offsets.reserve(additional);36self.validity.reserve(additional);37// No values reserve, we have no idea how large it needs to be.38}3940fn freeze(self) -> BinaryArray<O> {41let offsets = OffsetsBuffer::from(self.offsets);42let values = Buffer::from(self.values);43let validity = self.validity.into_opt_validity();44BinaryArray::new(self.dtype, offsets, values, validity)45}4647fn freeze_reset(&mut self) -> Self::Array {48let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets));49let values = Buffer::from(core::mem::take(&mut self.values));50let validity = core::mem::take(&mut self.validity).into_opt_validity();51BinaryArray::new(self.dtype.clone(), offsets, values, validity)52}5354fn len(&self) -> usize {55self.offsets.len_proxy()56}5758fn extend_nulls(&mut self, length: usize) {59self.offsets.extend_constant(length);60self.validity.extend_constant(length, false);61}6263fn subslice_extend(64&mut self,65other: &BinaryArray<O>,66start: usize,67length: usize,68_share: ShareStrategy,69) {70let start_offset = other.offsets()[start].to_usize();71let stop_offset = other.offsets()[start + length].to_usize();72self.offsets73.try_extend_from_slice(other.offsets(), start, length)74.unwrap();75self.values76.extend_from_slice(&other.values()[start_offset..stop_offset]);77self.validity78.subslice_extend_from_opt_validity(other.validity(), start, length);79}8081fn subslice_extend_each_repeated(82&mut self,83other: &BinaryArray<O>,84start: usize,85length: usize,86repeats: usize,87_share: ShareStrategy,88) {89let other_offsets = other.offsets();90let other_values = &**other.values();9192let start_offset = other.offsets()[start].to_usize();93let stop_offset = other.offsets()[start + length].to_usize();94self.offsets.reserve(length * repeats);95self.values.reserve((stop_offset - start_offset) * repeats);96for offset_idx in start..start + length {97let substring_start = other_offsets[offset_idx].to_usize();98let substring_stop = other_offsets[offset_idx + 1].to_usize();99for _ in 0..repeats {100self.offsets101.try_push(substring_stop - substring_start)102.unwrap();103self.values104.extend_from_slice(&other_values[substring_start..substring_stop]);105}106}107self.validity108.subslice_extend_each_repeated_from_opt_validity(109other.validity(),110start,111length,112repeats,113);114}115116unsafe fn gather_extend(117&mut self,118other: &BinaryArray<O>,119idxs: &[IdxSize],120_share: ShareStrategy,121) {122let other_values = &**other.values();123let other_offsets = other.offsets();124125// Pre-compute proper length for reserve.126let total_len: usize = idxs127.iter()128.map(|i| {129let start_offset = other_offsets.get_unchecked(*i as usize).to_usize();130let stop_offset = other_offsets.get_unchecked(*i as usize + 1).to_usize();131stop_offset - start_offset132})133.sum();134self.values.reserve(total_len);135136for idx in idxs {137let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();138let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();139self.values140.extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));141}142143self.validity144.gather_extend_from_opt_validity(other.validity(), idxs);145}146147fn opt_gather_extend(148&mut self,149other: &BinaryArray<O>,150idxs: &[IdxSize],151_share: ShareStrategy,152) {153let other_values = &**other.values();154let other_offsets = other.offsets();155156unsafe {157// Pre-compute proper length for reserve.158let total_len: usize = idxs159.iter()160.map(|idx| {161if (*idx as usize) < other.len() {162let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();163let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();164stop_offset - start_offset165} else {1660167}168})169.sum();170self.values.reserve(total_len);171172for idx in idxs {173let start_offset = other_offsets.get_unchecked(*idx as usize).to_usize();174let stop_offset = other_offsets.get_unchecked(*idx as usize + 1).to_usize();175self.values176.extend_from_slice(other_values.get_unchecked(start_offset..stop_offset));177}178179self.validity180.opt_gather_extend_from_opt_validity(other.validity(), idxs, other.len());181}182}183}184185186