Path: blob/main/crates/polars-arrow/src/array/binary/mutable_values.rs
6939 views
use std::sync::Arc;12use polars_error::{PolarsResult, polars_bail};34use super::{BinaryArray, MutableBinaryArray};5use crate::array::physical_binary::*;6use crate::array::specification::try_check_offsets_bounds;7use crate::array::{8Array, ArrayAccessor, ArrayValuesIter, MutableArray, TryExtend, TryExtendFromSelf, TryPush,9};10use crate::bitmap::MutableBitmap;11use crate::datatypes::ArrowDataType;12use crate::offset::{Offset, Offsets};13use crate::trusted_len::TrustedLen;1415/// A [`MutableArray`] that builds a [`BinaryArray`]. It differs16/// from [`MutableBinaryArray`] in that it builds non-null [`BinaryArray`].17#[derive(Debug, Clone)]18pub struct MutableBinaryValuesArray<O: Offset> {19dtype: ArrowDataType,20offsets: Offsets<O>,21values: Vec<u8>,22}2324impl<O: Offset> From<MutableBinaryValuesArray<O>> for BinaryArray<O> {25fn from(other: MutableBinaryValuesArray<O>) -> Self {26BinaryArray::<O>::new(other.dtype, other.offsets.into(), other.values.into(), None)27}28}2930impl<O: Offset> From<MutableBinaryValuesArray<O>> for MutableBinaryArray<O> {31fn from(other: MutableBinaryValuesArray<O>) -> Self {32MutableBinaryArray::<O>::try_new(other.dtype, other.offsets, other.values, None)33.expect("MutableBinaryValuesArray is consistent with MutableBinaryArray")34}35}3637impl<O: Offset> Default for MutableBinaryValuesArray<O> {38fn default() -> Self {39Self::new()40}41}4243impl<O: Offset> MutableBinaryValuesArray<O> {44/// Returns an empty [`MutableBinaryValuesArray`].45pub fn new() -> Self {46Self {47dtype: Self::default_dtype(),48offsets: Offsets::new(),49values: Vec::<u8>::new(),50}51}5253/// Returns a [`MutableBinaryValuesArray`] created from its internal representation.54///55/// # Errors56/// This function returns an error iff:57/// * The last offset is not equal to the values' length.58/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.59/// # Implementation60/// This function is `O(1)`61pub fn try_new(62dtype: ArrowDataType,63offsets: Offsets<O>,64values: Vec<u8>,65) -> PolarsResult<Self> {66try_check_offsets_bounds(&offsets, values.len())?;6768if dtype.to_physical_type() != Self::default_dtype().to_physical_type() {69polars_bail!(ComputeError: "MutableBinaryValuesArray can only be initialized with DataType::Binary or DataType::LargeBinary",)70}7172Ok(Self {73dtype,74offsets,75values,76})77}7879/// Returns the default [`ArrowDataType`] of this container: [`ArrowDataType::Utf8`] or [`ArrowDataType::LargeUtf8`]80/// depending on the generic [`Offset`].81pub fn default_dtype() -> ArrowDataType {82BinaryArray::<O>::default_dtype()83}8485/// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items.86pub fn with_capacity(capacity: usize) -> Self {87Self::with_capacities(capacity, 0)88}8990/// Initializes a new [`MutableBinaryValuesArray`] with a pre-allocated capacity of items and values.91pub fn with_capacities(capacity: usize, values: usize) -> Self {92Self {93dtype: Self::default_dtype(),94offsets: Offsets::<O>::with_capacity(capacity),95values: Vec::<u8>::with_capacity(values),96}97}9899/// returns its values.100#[inline]101pub fn values(&self) -> &Vec<u8> {102&self.values103}104105/// returns its offsets.106#[inline]107pub fn offsets(&self) -> &Offsets<O> {108&self.offsets109}110111/// Reserves `additional` elements and `additional_values` on the values.112#[inline]113pub fn reserve(&mut self, additional: usize, additional_values: usize) {114self.offsets.reserve(additional);115self.values.reserve(additional_values);116}117118/// Returns the capacity in number of items119pub fn capacity(&self) -> usize {120self.offsets.capacity()121}122123/// Returns the length of this array124#[inline]125pub fn len(&self) -> usize {126self.offsets.len_proxy()127}128129/// Pushes a new item to the array.130/// # Panic131/// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.132#[inline]133pub fn push<T: AsRef<[u8]>>(&mut self, value: T) {134self.try_push(value).unwrap()135}136137/// Pop the last entry from [`MutableBinaryValuesArray`].138/// This function returns `None` iff this array is empty.139pub fn pop(&mut self) -> Option<Vec<u8>> {140if self.len() == 0 {141return None;142}143self.offsets.pop()?;144let start = self.offsets.last().to_usize();145let value = self.values.split_off(start);146Some(value.to_vec())147}148149/// Returns the value of the element at index `i`.150/// # Panic151/// This function panics iff `i >= self.len`.152#[inline]153pub fn value(&self, i: usize) -> &[u8] {154assert!(i < self.len());155unsafe { self.value_unchecked(i) }156}157158/// Returns the value of the element at index `i`.159///160/// # Safety161/// This function is safe iff `i < self.len`.162#[inline]163pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {164// soundness: the invariant of the function165let (start, end) = self.offsets.start_end(i);166167// soundness: the invariant of the struct168self.values.get_unchecked(start..end)169}170171/// Returns an iterator of `&[u8]`172pub fn iter(&self) -> ArrayValuesIter<'_, Self> {173ArrayValuesIter::new(self)174}175176/// Shrinks the capacity of the [`MutableBinaryValuesArray`] to fit its current length.177pub fn shrink_to_fit(&mut self) {178self.values.shrink_to_fit();179self.offsets.shrink_to_fit();180}181182/// Extract the low-end APIs from the [`MutableBinaryValuesArray`].183pub fn into_inner(self) -> (ArrowDataType, Offsets<O>, Vec<u8>) {184(self.dtype, self.offsets, self.values)185}186}187188impl<O: Offset> MutableArray for MutableBinaryValuesArray<O> {189fn len(&self) -> usize {190self.len()191}192193fn validity(&self) -> Option<&MutableBitmap> {194None195}196197fn as_box(&mut self) -> Box<dyn Array> {198let (dtype, offsets, values) = std::mem::take(self).into_inner();199BinaryArray::new(dtype, offsets.into(), values.into(), None).boxed()200}201202fn as_arc(&mut self) -> Arc<dyn Array> {203let (dtype, offsets, values) = std::mem::take(self).into_inner();204BinaryArray::new(dtype, offsets.into(), values.into(), None).arced()205}206207fn dtype(&self) -> &ArrowDataType {208&self.dtype209}210211fn as_any(&self) -> &dyn std::any::Any {212self213}214215fn as_mut_any(&mut self) -> &mut dyn std::any::Any {216self217}218219#[inline]220fn push_null(&mut self) {221self.push::<&[u8]>(b"")222}223224fn reserve(&mut self, additional: usize) {225self.reserve(additional, 0)226}227228fn shrink_to_fit(&mut self) {229self.shrink_to_fit()230}231}232233impl<O: Offset, P: AsRef<[u8]>> FromIterator<P> for MutableBinaryValuesArray<O> {234fn from_iter<I: IntoIterator<Item = P>>(iter: I) -> Self {235let (offsets, values) = values_iter(iter.into_iter());236Self::try_new(Self::default_dtype(), offsets, values).unwrap()237}238}239240impl<O: Offset> MutableBinaryValuesArray<O> {241pub(crate) unsafe fn extend_from_trusted_len_iter<I, P>(242&mut self,243validity: &mut MutableBitmap,244iterator: I,245) where246P: AsRef<[u8]>,247I: Iterator<Item = Option<P>>,248{249extend_from_trusted_len_iter(&mut self.offsets, &mut self.values, validity, iterator);250}251252/// Extends the [`MutableBinaryValuesArray`] from a [`TrustedLen`]253#[inline]254pub fn extend_trusted_len<I, P>(&mut self, iterator: I)255where256P: AsRef<[u8]>,257I: TrustedLen<Item = P>,258{259unsafe { self.extend_trusted_len_unchecked(iterator) }260}261262/// Extends [`MutableBinaryValuesArray`] from an iterator of trusted len.263///264/// # Safety265/// The iterator must be trusted len.266#[inline]267pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)268where269P: AsRef<[u8]>,270I: Iterator<Item = P>,271{272extend_from_trusted_len_values_iter(&mut self.offsets, &mut self.values, iterator);273}274275/// Creates a [`MutableBinaryValuesArray`] from a [`TrustedLen`]276#[inline]277pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self278where279P: AsRef<[u8]>,280I: TrustedLen<Item = P>,281{282// soundness: I is `TrustedLen`283unsafe { Self::from_trusted_len_iter_unchecked(iterator) }284}285286/// Returns a new [`MutableBinaryValuesArray`] from an iterator of trusted length.287///288/// # Safety289/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).290/// I.e. that `size_hint().1` correctly reports its length.291#[inline]292pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self293where294P: AsRef<[u8]>,295I: Iterator<Item = P>,296{297let (offsets, values) = trusted_len_values_iter(iterator);298Self::try_new(Self::default_dtype(), offsets, values).unwrap()299}300301/// Returns a new [`MutableBinaryValuesArray`] from an iterator.302/// # Error303/// This operation errors iff the total length in bytes on the iterator exceeds `O`'s maximum value.304/// (`i32::MAX` or `i64::MAX` respectively).305pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = P>>(iter: I) -> PolarsResult<Self> {306let iterator = iter.into_iter();307let (lower, _) = iterator.size_hint();308let mut array = Self::with_capacity(lower);309for item in iterator {310array.try_push(item)?;311}312Ok(array)313}314315/// Extend with a fallible iterator316pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>317where318E: std::error::Error,319I: IntoIterator<Item = std::result::Result<T, E>>,320T: AsRef<[u8]>,321{322let mut iter = iter.into_iter();323self.reserve(iter.size_hint().0, 0);324iter.try_for_each(|x| {325self.push(x?);326Ok(())327})328}329}330331impl<O: Offset, T: AsRef<[u8]>> Extend<T> for MutableBinaryValuesArray<O> {332fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {333extend_from_values_iter(&mut self.offsets, &mut self.values, iter.into_iter());334}335}336337impl<O: Offset, T: AsRef<[u8]>> TryExtend<T> for MutableBinaryValuesArray<O> {338fn try_extend<I: IntoIterator<Item = T>>(&mut self, iter: I) -> PolarsResult<()> {339let mut iter = iter.into_iter();340self.reserve(iter.size_hint().0, 0);341iter.try_for_each(|x| self.try_push(x))342}343}344345impl<O: Offset, T: AsRef<[u8]>> TryPush<T> for MutableBinaryValuesArray<O> {346#[inline]347fn try_push(&mut self, value: T) -> PolarsResult<()> {348let bytes = value.as_ref();349self.values.extend_from_slice(bytes);350self.offsets.try_push(bytes.len())351}352}353354unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableBinaryValuesArray<O> {355type Item = &'a [u8];356357#[inline]358unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {359self.value_unchecked(index)360}361362#[inline]363fn len(&self) -> usize {364self.len()365}366}367368impl<O: Offset> TryExtendFromSelf for MutableBinaryValuesArray<O> {369fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {370self.values.extend_from_slice(&other.values);371self.offsets.try_extend_from_self(&other.offsets)372}373}374375376