Path: blob/main/crates/polars-arrow/src/array/binary/mutable.rs
6939 views
use std::sync::Arc;12use polars_error::{PolarsResult, polars_bail};34use super::{BinaryArray, MutableBinaryValuesArray, MutableBinaryValuesIter};5use crate::array::physical_binary::*;6use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};7use crate::bitmap::utils::{BitmapIter, ZipValidity};8use crate::bitmap::{Bitmap, MutableBitmap};9use crate::datatypes::ArrowDataType;10use crate::offset::{Offset, Offsets};11use crate::trusted_len::TrustedLen;1213/// The Arrow's equivalent to `Vec<Option<Vec<u8>>>`.14/// Converting a [`MutableBinaryArray`] into a [`BinaryArray`] is `O(1)`.15/// # Implementation16/// This struct does not allocate a validity until one is required (i.e. push a null to it).17#[derive(Debug, Clone)]18pub struct MutableBinaryArray<O: Offset> {19values: MutableBinaryValuesArray<O>,20validity: Option<MutableBitmap>,21}2223impl<O: Offset> From<MutableBinaryArray<O>> for BinaryArray<O> {24fn from(other: MutableBinaryArray<O>) -> Self {25let validity = other.validity.and_then(|x| {26let validity: Option<Bitmap> = x.into();27validity28});29let array: BinaryArray<O> = other.values.into();30array.with_validity(validity)31}32}3334impl<O: Offset> Default for MutableBinaryArray<O> {35fn default() -> Self {36Self::new()37}38}3940impl<O: Offset> MutableBinaryArray<O> {41/// Creates a new empty [`MutableBinaryArray`].42/// # Implementation43/// This allocates a [`Vec`] of one element44pub fn new() -> Self {45Self::with_capacity(0)46}4748/// Returns a [`MutableBinaryArray`] created from its internal representation.49///50/// # Errors51/// This function returns an error iff:52/// * The last offset is not equal to the values' length.53/// * the validity's length is not equal to `offsets.len()`.54/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.55/// # Implementation56/// This function is `O(1)`57pub fn try_new(58dtype: ArrowDataType,59offsets: Offsets<O>,60values: Vec<u8>,61validity: Option<MutableBitmap>,62) -> PolarsResult<Self> {63let values = MutableBinaryValuesArray::try_new(dtype, offsets, values)?;6465if validity66.as_ref()67.is_some_and(|validity| validity.len() != values.len())68{69polars_bail!(ComputeError: "validity's length must be equal to the number of values")70}7172Ok(Self { values, validity })73}7475/// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.76// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.77pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {78Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))79}8081fn default_dtype() -> ArrowDataType {82BinaryArray::<O>::default_dtype()83}8485/// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots.86pub fn with_capacity(capacity: usize) -> Self {87Self::with_capacities(capacity, 0)88}8990/// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots and values.91/// # Implementation92/// This does not allocate the validity.93pub fn with_capacities(capacity: usize, values: usize) -> Self {94Self {95values: MutableBinaryValuesArray::with_capacities(capacity, values),96validity: None,97}98}99100/// Reserves `additional` elements and `additional_values` on the values buffer.101pub fn reserve(&mut self, additional: usize, additional_values: usize) {102self.values.reserve(additional, additional_values);103if let Some(x) = self.validity.as_mut() {104x.reserve(additional)105}106}107108/// Pushes a new element to the array.109/// # Panic110/// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.111pub fn push<T: AsRef<[u8]>>(&mut self, value: Option<T>) {112self.try_push(value).unwrap()113}114115/// Pop the last entry from [`MutableBinaryArray`].116/// This function returns `None` iff this array is empty117pub fn pop(&mut self) -> Option<Vec<u8>> {118let value = self.values.pop()?;119self.validity120.as_mut()121.map(|x| x.pop()?.then(|| ()))122.unwrap_or_else(|| Some(()))123.map(|_| value)124}125126fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(127iter: I,128) -> PolarsResult<Self> {129let iterator = iter.into_iter();130let (lower, _) = iterator.size_hint();131let mut primitive = Self::with_capacity(lower);132for item in iterator {133primitive.try_push(item.as_ref())?134}135Ok(primitive)136}137138fn init_validity(&mut self) {139let mut validity = MutableBitmap::with_capacity(self.values.capacity());140validity.extend_constant(self.len(), true);141validity.set(self.len() - 1, false);142self.validity = Some(validity);143}144145/// Converts itself into an [`Array`].146pub fn into_arc(self) -> Arc<dyn Array> {147let a: BinaryArray<O> = self.into();148Arc::new(a)149}150151/// Shrinks the capacity of the [`MutableBinaryArray`] to fit its current length.152pub fn shrink_to_fit(&mut self) {153self.values.shrink_to_fit();154if let Some(validity) = &mut self.validity {155validity.shrink_to_fit()156}157}158159impl_mutable_array_mut_validity!();160}161162impl<O: Offset> MutableBinaryArray<O> {163/// returns its values.164pub fn values(&self) -> &Vec<u8> {165self.values.values()166}167168/// returns its offsets.169pub fn offsets(&self) -> &Offsets<O> {170self.values.offsets()171}172173/// Returns an iterator of `Option<&[u8]>`174pub fn iter(&self) -> ZipValidity<&[u8], MutableBinaryValuesIter<'_, O>, BitmapIter<'_>> {175ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))176}177178/// Returns an iterator over the values of this array179pub fn values_iter(&self) -> MutableBinaryValuesIter<'_, O> {180self.values.iter()181}182}183184impl<O: Offset> MutableArray for MutableBinaryArray<O> {185fn len(&self) -> usize {186self.values.len()187}188189fn validity(&self) -> Option<&MutableBitmap> {190self.validity.as_ref()191}192193fn as_box(&mut self) -> Box<dyn Array> {194let array: BinaryArray<O> = std::mem::take(self).into();195array.boxed()196}197198fn as_arc(&mut self) -> Arc<dyn Array> {199let array: BinaryArray<O> = std::mem::take(self).into();200array.arced()201}202203fn dtype(&self) -> &ArrowDataType {204self.values.dtype()205}206207fn as_any(&self) -> &dyn std::any::Any {208self209}210211fn as_mut_any(&mut self) -> &mut dyn std::any::Any {212self213}214215#[inline]216fn push_null(&mut self) {217self.push::<&[u8]>(None)218}219220fn reserve(&mut self, additional: usize) {221self.reserve(additional, 0)222}223224fn shrink_to_fit(&mut self) {225self.shrink_to_fit()226}227}228229impl<O: Offset, P: AsRef<[u8]>> FromIterator<Option<P>> for MutableBinaryArray<O> {230fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {231Self::try_from_iter(iter).unwrap()232}233}234235impl<O: Offset> MutableBinaryArray<O> {236/// Creates a [`MutableBinaryArray`] from an iterator of trusted length.237///238/// # Safety239/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).240/// I.e. that `size_hint().1` correctly reports its length.241#[inline]242pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self243where244P: AsRef<[u8]>,245I: Iterator<Item = Option<P>>,246{247let (validity, offsets, values) = trusted_len_unzip(iterator);248249Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap()250}251252/// Creates a [`MutableBinaryArray`] from an iterator of trusted length.253#[inline]254pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self255where256P: AsRef<[u8]>,257I: TrustedLen<Item = Option<P>>,258{259// soundness: I is `TrustedLen`260unsafe { Self::from_trusted_len_iter_unchecked(iterator) }261}262263/// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.264///265/// # Safety266/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).267/// I.e. that `size_hint().1` correctly reports its length.268#[inline]269pub unsafe fn from_trusted_len_values_iter_unchecked<T: AsRef<[u8]>, I: Iterator<Item = T>>(270iterator: I,271) -> Self {272let (offsets, values) = trusted_len_values_iter(iterator);273Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()274}275276/// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`.277#[inline]278pub fn from_trusted_len_values_iter<T: AsRef<[u8]>, I: TrustedLen<Item = T>>(279iterator: I,280) -> Self {281// soundness: I is `TrustedLen`282unsafe { Self::from_trusted_len_values_iter_unchecked(iterator) }283}284285/// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.286///287/// # Safety288/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).289/// I.e. that `size_hint().1` correctly reports its length.290#[inline]291pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(292iterator: I,293) -> std::result::Result<Self, E>294where295P: AsRef<[u8]>,296I: IntoIterator<Item = std::result::Result<Option<P>, E>>,297{298let iterator = iterator.into_iter();299300// soundness: assumed trusted len301let (validity, offsets, values) = try_trusted_len_unzip(iterator)?;302Ok(Self::try_new(Self::default_dtype(), offsets, values, validity).unwrap())303}304305/// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length.306#[inline]307pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>308where309P: AsRef<[u8]>,310I: TrustedLen<Item = std::result::Result<Option<P>, E>>,311{312// soundness: I: TrustedLen313unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }314}315316/// Extends the [`MutableBinaryArray`] from an iterator of trusted length.317/// This differs from `extend_trusted_len` which accepts iterator of optional values.318#[inline]319pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)320where321P: AsRef<[u8]>,322I: TrustedLen<Item = P>,323{324// SAFETY: The iterator is `TrustedLen`325unsafe { self.extend_trusted_len_values_unchecked(iterator) }326}327328/// Extends the [`MutableBinaryArray`] from an iterator of values.329/// This differs from `extended_trusted_len` which accepts iterator of optional values.330#[inline]331pub fn extend_values<I, P>(&mut self, iterator: I)332where333P: AsRef<[u8]>,334I: Iterator<Item = P>,335{336let length = self.values.len();337self.values.extend(iterator);338let additional = self.values.len() - length;339340if let Some(validity) = self.validity.as_mut() {341validity.extend_constant(additional, true);342}343}344345/// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.346/// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional347/// values.348///349/// # Safety350/// The `iterator` must be [`TrustedLen`]351#[inline]352pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)353where354P: AsRef<[u8]>,355I: Iterator<Item = P>,356{357let length = self.values.len();358self.values.extend_trusted_len_unchecked(iterator);359let additional = self.values.len() - length;360361if let Some(validity) = self.validity.as_mut() {362validity.extend_constant(additional, true);363}364}365366/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]367#[inline]368pub fn extend_trusted_len<I, P>(&mut self, iterator: I)369where370P: AsRef<[u8]>,371I: TrustedLen<Item = Option<P>>,372{373// SAFETY: The iterator is `TrustedLen`374unsafe { self.extend_trusted_len_unchecked(iterator) }375}376377/// Extends the [`MutableBinaryArray`] from an iterator of [`TrustedLen`]378///379/// # Safety380/// The `iterator` must be [`TrustedLen`]381#[inline]382pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)383where384P: AsRef<[u8]>,385I: Iterator<Item = Option<P>>,386{387if self.validity.is_none() {388let mut validity = MutableBitmap::new();389validity.extend_constant(self.len(), true);390self.validity = Some(validity);391}392393self.values394.extend_from_trusted_len_iter(self.validity.as_mut().unwrap(), iterator);395}396397/// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`.398pub fn from_iter_values<T: AsRef<[u8]>, I: Iterator<Item = T>>(iterator: I) -> Self {399let (offsets, values) = values_iter(iterator);400Self::try_new(Self::default_dtype(), offsets, values, None).unwrap()401}402403/// Extend with a fallible iterator404pub fn extend_fallible<T, I, E>(&mut self, iter: I) -> std::result::Result<(), E>405where406E: std::error::Error,407I: IntoIterator<Item = std::result::Result<Option<T>, E>>,408T: AsRef<[u8]>,409{410let mut iter = iter.into_iter();411self.reserve(iter.size_hint().0, 0);412iter.try_for_each(|x| {413self.push(x?);414Ok(())415})416}417}418419impl<O: Offset, T: AsRef<[u8]>> Extend<Option<T>> for MutableBinaryArray<O> {420fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {421self.try_extend(iter).unwrap();422}423}424425impl<O: Offset, T: AsRef<[u8]>> TryExtend<Option<T>> for MutableBinaryArray<O> {426fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {427let mut iter = iter.into_iter();428self.reserve(iter.size_hint().0, 0);429iter.try_for_each(|x| self.try_push(x))430}431}432433impl<O: Offset, T: AsRef<[u8]>> TryPush<Option<T>> for MutableBinaryArray<O> {434fn try_push(&mut self, value: Option<T>) -> PolarsResult<()> {435match value {436Some(value) => {437self.values.try_push(value.as_ref())?;438439if let Some(validity) = &mut self.validity {440validity.push(true)441}442},443None => {444self.values.push("");445match &mut self.validity {446Some(validity) => validity.push(false),447None => self.init_validity(),448}449},450}451Ok(())452}453}454455impl<O: Offset> PartialEq for MutableBinaryArray<O> {456fn eq(&self, other: &Self) -> bool {457self.iter().eq(other.iter())458}459}460461impl<O: Offset> TryExtendFromSelf for MutableBinaryArray<O> {462fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {463extend_validity(self.len(), &mut self.validity, &other.validity);464465self.values.try_extend_from_self(&other.values)466}467}468469470