Path: blob/main/crates/polars-arrow/src/array/binview/mutable.rs
8363 views
use std::any::Any;1use std::fmt::{Debug, Formatter};2use std::ops::Deref;34use hashbrown::hash_map::Entry;5use polars_buffer::Buffer;6use polars_error::PolarsResult;7use polars_utils::aliases::{InitHashMaps, PlHashMap};89use crate::array::binview::iterator::MutableBinaryViewValueIter;10use crate::array::binview::view::validate_views_utf8_only;11use crate::array::binview::{12BinaryViewArrayGeneric, DEFAULT_BLOCK_SIZE, MAX_EXP_BLOCK_SIZE, ViewType,13};14use crate::array::{Array, MutableArray, TryExtend, TryPush, View};15use crate::bitmap::MutableBitmap;16use crate::datatypes::ArrowDataType;17use crate::legacy::trusted_len::TrustedLenPush;18use crate::trusted_len::TrustedLen;1920// Invariants:21//22// - Each view must point to a valid slice of a buffer23// - `total_buffer_len` must be equal to `completed_buffers.iter().map(Vec::len).sum()`24// - `total_bytes_len` must be equal to `views.iter().map(View::len).sum()`25pub struct MutableBinaryViewArray<T: ViewType + ?Sized> {26pub(crate) views: Vec<View>,27pub(crate) completed_buffers: Vec<Buffer<u8>>,28pub(crate) in_progress_buffer: Vec<u8>,29pub(crate) validity: Option<MutableBitmap>,30pub(crate) phantom: std::marker::PhantomData<T>,31/// Total bytes length if we would concatenate them all.32pub(crate) total_bytes_len: usize,33/// Total bytes in the buffer (excluding remaining capacity)34pub(crate) total_buffer_len: usize,35/// Mapping from `Buffer::deref()` to index in `completed_buffers`.36/// Used in `push_view()`.37pub(crate) stolen_buffers: PlHashMap<usize, u32>,38}3940impl<T: ViewType + ?Sized> Clone for MutableBinaryViewArray<T> {41fn clone(&self) -> Self {42Self {43views: self.views.clone(),44completed_buffers: self.completed_buffers.clone(),45in_progress_buffer: self.in_progress_buffer.clone(),46validity: self.validity.clone(),47phantom: Default::default(),48total_bytes_len: self.total_bytes_len,49total_buffer_len: self.total_buffer_len,50stolen_buffers: PlHashMap::new(),51}52}53}5455impl<T: ViewType + ?Sized> Debug for MutableBinaryViewArray<T> {56fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {57write!(f, "mutable-binview{:?}", T::DATA_TYPE)58}59}6061impl<T: ViewType + ?Sized> Default for MutableBinaryViewArray<T> {62fn default() -> Self {63Self::with_capacity(0)64}65}6667impl<T: ViewType + ?Sized> From<MutableBinaryViewArray<T>> for BinaryViewArrayGeneric<T> {68fn from(mut value: MutableBinaryViewArray<T>) -> Self {69value.finish_in_progress();70unsafe {71Self::new_unchecked(72T::DATA_TYPE,73value.views.into(),74Buffer::from(value.completed_buffers),75value.validity.map(|b| b.into()),76Some(value.total_bytes_len),77value.total_buffer_len,78)79}80}81}8283impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {84pub fn new() -> Self {85Self::default()86}8788pub fn with_capacity(capacity: usize) -> Self {89Self {90views: Vec::with_capacity(capacity),91completed_buffers: vec![],92in_progress_buffer: vec![],93validity: None,94phantom: Default::default(),95total_buffer_len: 0,96total_bytes_len: 0,97stolen_buffers: PlHashMap::new(),98}99}100101/// Get a mutable reference to the [`Vec`] of [`View`]s in this [`MutableBinaryViewArray`].102///103/// # Safety104///105/// This is safe as long as any mutation of the [`Vec`] does not break any invariants of the106/// [`MutableBinaryViewArray`] before it is read again.107#[inline]108pub unsafe fn views_mut(&mut self) -> &mut Vec<View> {109&mut self.views110}111112/// Set the `total_bytes_len` of the [`MutableBinaryViewArray`]113///114/// # Safety115///116/// This should not break invariants of the [`MutableBinaryViewArray`]117#[inline]118pub unsafe fn set_total_bytes_len(&mut self, value: usize) {119#[cfg(debug_assertions)]120{121let actual_length: usize = self.views().iter().map(|v| v.length as usize).sum();122assert_eq!(value, actual_length);123}124125self.total_bytes_len = value;126}127128pub fn total_bytes_len(&self) -> usize {129self.total_bytes_len130}131132pub fn total_buffer_len(&self) -> usize {133self.total_buffer_len134}135136#[inline]137pub fn views(&self) -> &[View] {138&self.views139}140141#[inline]142pub fn completed_buffers(&self) -> &[Buffer<u8>] {143&self.completed_buffers144}145146pub fn validity(&mut self) -> Option<&mut MutableBitmap> {147self.validity.as_mut()148}149150/// Reserves `additional` elements and `additional_buffer` on the buffer.151pub fn reserve(&mut self, additional: usize) {152self.views.reserve(additional);153}154155#[inline]156pub fn len(&self) -> usize {157self.views.len()158}159160#[inline]161pub fn capacity(&self) -> usize {162self.views.capacity()163}164165fn init_validity(&mut self, unset_last: bool) {166let mut validity = MutableBitmap::with_capacity(self.views.capacity());167validity.extend_constant(self.len(), true);168if unset_last {169validity.set(self.len() - 1, false);170}171self.validity = Some(validity);172}173174/// # Safety175/// - caller must allocate enough capacity176/// - caller must ensure the view and buffers match.177/// - The array must not have validity.178pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer<u8>]) {179let len = v.length;180if len <= View::MAX_INLINE_SIZE {181debug_assert!(self.views.capacity() > self.views.len());182self.views.push_unchecked(v);183self.total_bytes_len += len as usize;184} else {185let data = buffers.get_unchecked(v.buffer_idx as usize);186let offset = v.offset as usize;187let bytes = data.get_unchecked(offset..offset + len as usize);188let t = T::from_bytes_unchecked(bytes);189self.push_value_ignore_validity(t)190}191}192193/// # Safety194/// - caller must allocate enough capacity195/// - caller must ensure the view and buffers match.196/// - The array must not have validity.197/// - caller must not mix use this function with other push functions.198pub unsafe fn push_view_unchecked_dedupe(&mut self, mut v: View, buffers: &[Buffer<u8>]) {199let len = v.length;200self.total_bytes_len += len as usize;201if len <= View::MAX_INLINE_SIZE {202self.views.push_unchecked(v);203} else {204let buffer = buffers.get_unchecked(v.buffer_idx as usize);205let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {206Entry::Occupied(entry) => *entry.get(),207Entry::Vacant(entry) => {208let idx = self.completed_buffers.len() as u32;209entry.insert(idx);210self.completed_buffers.push(buffer.clone());211self.total_buffer_len += buffer.len();212idx213},214};215v.buffer_idx = idx;216self.views.push_unchecked(v);217}218}219220pub fn push_view(&mut self, mut v: View, buffers: &[Buffer<u8>]) {221let len = v.length;222self.total_bytes_len += len as usize;223if len <= View::MAX_INLINE_SIZE {224self.views.push(v);225} else {226// Do no mix use of push_view and push_value_ignore_validity -227// it causes fragmentation.228self.finish_in_progress();229230let buffer = &buffers[v.buffer_idx as usize];231let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {232Entry::Occupied(entry) => {233let idx = *entry.get();234let target_buffer = &self.completed_buffers[idx as usize];235debug_assert_eq!(buffer, target_buffer);236idx237},238Entry::Vacant(entry) => {239let idx = self.completed_buffers.len() as u32;240entry.insert(idx);241self.completed_buffers.push(buffer.clone());242self.total_buffer_len += buffer.len();243idx244},245};246v.buffer_idx = idx;247self.views.push(v);248}249if let Some(validity) = &mut self.validity {250validity.push(true)251}252}253254#[inline]255pub fn push_value_ignore_validity<V: AsRef<T>>(&mut self, value: V) {256let bytes = value.as_ref().to_bytes();257self.total_bytes_len += bytes.len();258let view = self.push_value_into_buffer(bytes);259self.views.push(view);260}261262#[inline]263pub fn push_buffer(&mut self, buffer: Buffer<u8>) -> u32 {264self.finish_in_progress();265266let buffer_idx = self.completed_buffers.len();267self.total_buffer_len += buffer.len();268self.completed_buffers.push(buffer);269buffer_idx as u32270}271272#[inline]273pub fn push_value<V: AsRef<T>>(&mut self, value: V) {274if let Some(validity) = &mut self.validity {275validity.push(true)276}277self.push_value_ignore_validity(value)278}279280#[inline]281pub fn push<V: AsRef<T>>(&mut self, value: Option<V>) {282if let Some(value) = value {283self.push_value(value)284} else {285self.push_null()286}287}288289#[inline]290pub fn push_null(&mut self) {291self.views.push(View::default());292match &mut self.validity {293Some(validity) => validity.push(false),294None => self.init_validity(true),295}296}297298/// Get a [`View`] for a specific set of bytes.299pub fn push_value_into_buffer(&mut self, bytes: &[u8]) -> View {300assert!(bytes.len() <= u32::MAX as usize);301302if bytes.len() <= View::MAX_INLINE_SIZE as usize {303View::new_inline(bytes)304} else {305self.total_buffer_len += bytes.len();306307// We want to make sure that we never have to memcopy between buffers. So if the308// current buffer is not large enough, create a new buffer that is large enough and try309// to anticipate the larger size.310let required_capacity = self.in_progress_buffer.len() + bytes.len();311let does_not_fit_in_buffer = self.in_progress_buffer.capacity() < required_capacity;312313// We can only save offsets that are below u32::MAX314let offset_will_not_fit = self.in_progress_buffer.len() > u32::MAX as usize;315316if does_not_fit_in_buffer || offset_will_not_fit {317// Allocate a new buffer and flush the old buffer318let new_capacity = (self.in_progress_buffer.capacity() * 2)319.clamp(DEFAULT_BLOCK_SIZE, MAX_EXP_BLOCK_SIZE)320.max(bytes.len());321let in_progress = Vec::with_capacity(new_capacity);322let flushed = std::mem::replace(&mut self.in_progress_buffer, in_progress);323if !flushed.is_empty() {324self.completed_buffers.push(flushed.into())325}326}327328let offset = self.in_progress_buffer.len() as u32;329self.in_progress_buffer.extend_from_slice(bytes);330331let buffer_idx = u32::try_from(self.completed_buffers.len()).unwrap();332333View::new_from_bytes(bytes, buffer_idx, offset)334}335}336337pub fn extend_null(&mut self, additional: usize) {338if self.validity.is_none() && additional > 0 {339self.init_validity(false);340}341self.views342.extend(std::iter::repeat_n(View::default(), additional));343if let Some(validity) = &mut self.validity {344validity.extend_constant(additional, false);345}346}347348pub fn extend_constant<V: AsRef<T>>(&mut self, additional: usize, value: Option<V>) {349if value.is_none() && self.validity.is_none() {350self.init_validity(false);351}352353if let Some(validity) = &mut self.validity {354validity.extend_constant(additional, value.is_some())355}356357// Push and pop to get the properly encoded value.358// For long string this leads to a dictionary encoding,359// as we push the string only once in the buffers360if let Some(bytes) = value {361let view = self.push_value_into_buffer(bytes.as_ref().to_bytes());362self.views.extend(std::iter::repeat_n(view, additional));363self.total_bytes_len += view.length as usize * additional;364}365}366367impl_mutable_array_mut_validity!();368369#[inline]370pub fn extend_values<I, P>(&mut self, iterator: I)371where372I: Iterator<Item = P>,373P: AsRef<T>,374{375self.reserve(iterator.size_hint().0);376for v in iterator {377self.push_value(v)378}379}380381#[inline]382pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)383where384I: TrustedLen<Item = P>,385P: AsRef<T>,386{387self.extend_values(iterator)388}389390#[inline]391pub fn extend<I, P>(&mut self, iterator: I)392where393I: Iterator<Item = Option<P>>,394P: AsRef<T>,395{396self.reserve(iterator.size_hint().0);397for p in iterator {398self.push(p)399}400}401402#[inline]403pub fn extend_trusted_len<I, P>(&mut self, iterator: I)404where405I: TrustedLen<Item = Option<P>>,406P: AsRef<T>,407{408self.extend(iterator)409}410411#[inline]412pub fn extend_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])413where414I: Iterator<Item = Option<View>>,415{416self.reserve(iterator.size_hint().0);417for p in iterator {418match p {419Some(v) => self.push_view(v, buffers),420None => self.push_null(),421}422}423}424425#[inline]426pub fn extend_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])427where428I: TrustedLen<Item = Option<View>>,429{430self.extend_views(iterator, buffers);431}432433#[inline]434pub fn extend_non_null_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])435where436I: Iterator<Item = View>,437{438self.reserve(iterator.size_hint().0);439for v in iterator {440self.push_view(v, buffers);441}442}443444#[inline]445pub fn extend_non_null_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])446where447I: TrustedLen<Item = View>,448{449self.extend_non_null_views(iterator, buffers);450}451452/// # Safety453/// Same as `push_view_unchecked()`.454#[inline]455pub unsafe fn extend_non_null_views_unchecked<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])456where457I: Iterator<Item = View>,458{459self.reserve(iterator.size_hint().0);460for v in iterator {461self.push_view_unchecked(v, buffers);462}463}464465/// # Safety466/// Same as `push_view_unchecked()`.467#[inline]468pub unsafe fn extend_non_null_views_unchecked_dedupe<I>(469&mut self,470iterator: I,471buffers: &[Buffer<u8>],472) where473I: Iterator<Item = View>,474{475self.reserve(iterator.size_hint().0);476for v in iterator {477self.push_view_unchecked_dedupe(v, buffers);478}479}480481#[inline]482pub fn from_iterator<I, P>(iterator: I) -> Self483where484I: Iterator<Item = Option<P>>,485P: AsRef<T>,486{487let mut mutable = Self::with_capacity(iterator.size_hint().0);488mutable.extend(iterator);489mutable490}491492pub fn from_values_iter<I, P>(iterator: I) -> Self493where494I: Iterator<Item = P>,495P: AsRef<T>,496{497let mut mutable = Self::with_capacity(iterator.size_hint().0);498mutable.extend_values(iterator);499mutable500}501502pub fn from<S: AsRef<T>, P: AsRef<[Option<S>]>>(slice: P) -> Self {503Self::from_iterator(slice.as_ref().iter().map(|opt_v| opt_v.as_ref()))504}505506pub fn finish_in_progress(&mut self) -> bool {507if !self.in_progress_buffer.is_empty() {508self.completed_buffers509.push(std::mem::take(&mut self.in_progress_buffer).into());510true511} else {512false513}514}515516#[inline]517pub fn freeze(self) -> BinaryViewArrayGeneric<T> {518self.into()519}520521#[inline]522pub fn freeze_with_dtype(self, dtype: ArrowDataType) -> BinaryViewArrayGeneric<T> {523let mut arr: BinaryViewArrayGeneric<T> = self.into();524arr.dtype = dtype;525arr526}527528pub fn take(self) -> (Vec<View>, Vec<Buffer<u8>>) {529(self.views, self.completed_buffers)530}531532#[inline]533pub fn value(&self, i: usize) -> &T {534assert!(i < self.len());535unsafe { self.value_unchecked(i) }536}537538/// Returns the element at index `i`539///540/// # Safety541/// Assumes that the `i < self.len`.542#[inline]543pub unsafe fn value_unchecked(&self, i: usize) -> &T {544self.value_from_view_unchecked(self.views.get_unchecked(i))545}546547/// Returns the element indicated by the given view.548///549/// # Safety550/// Assumes the View belongs to this MutableBinaryViewArray.551pub unsafe fn value_from_view_unchecked<'a>(&'a self, view: &'a View) -> &'a T {552// View layout:553// length: 4 bytes554// prefix: 4 bytes555// buffer_index: 4 bytes556// offset: 4 bytes557558// Inlined layout:559// length: 4 bytes560// data: 12 bytes561let len = view.length;562let bytes = if len <= View::MAX_INLINE_SIZE {563let ptr = view as *const View as *const u8;564std::slice::from_raw_parts(ptr.add(4), len as usize)565} else {566let buffer_idx = view.buffer_idx as usize;567let offset = view.offset;568569let data = if buffer_idx == self.completed_buffers.len() {570self.in_progress_buffer.as_slice()571} else {572self.completed_buffers.get_unchecked(buffer_idx)573};574575let offset = offset as usize;576data.get_unchecked(offset..offset + len as usize)577};578T::from_bytes_unchecked(bytes)579}580581/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity582pub fn values_iter(&self) -> MutableBinaryViewValueIter<'_, T> {583MutableBinaryViewValueIter::new(self)584}585586pub fn extend_from_array(&mut self, other: &BinaryViewArrayGeneric<T>) {587let slf_len = self.len();588match (&mut self.validity, other.validity()) {589(None, None) => {},590(Some(v), None) => v.extend_constant(other.len(), true),591(v @ None, Some(other)) => {592let mut bm = MutableBitmap::with_capacity(slf_len + other.len());593bm.extend_constant(slf_len, true);594bm.extend_from_bitmap(other);595*v = Some(bm);596},597(Some(slf), Some(other)) => slf.extend_from_bitmap(other),598}599600if other.total_buffer_len() == 0 {601self.views.extend(other.views().iter().copied());602} else {603self.finish_in_progress();604605let buffer_offset = self.completed_buffers().len() as u32;606self.completed_buffers607.extend(other.data_buffers().iter().cloned());608609self.views.extend(other.views().iter().map(|view| {610let mut view = *view;611if view.length > View::MAX_INLINE_SIZE {612view.buffer_idx += buffer_offset;613}614view615}));616617let new_total_buffer_len = self.total_buffer_len() + other.total_buffer_len();618self.total_buffer_len = new_total_buffer_len;619}620621self.total_bytes_len = self.total_bytes_len() + other.total_bytes_len();622}623}624625impl MutableBinaryViewArray<[u8]> {626pub fn validate_utf8(&mut self, buffer_offset: usize, views_offset: usize) -> PolarsResult<()> {627// Finish the in progress as it might be required for validation.628let pushed = self.finish_in_progress();629// views are correct630unsafe {631validate_views_utf8_only(632&self.views[views_offset..],633&self.completed_buffers,634buffer_offset,635)?636}637// Restore in-progress buffer as we don't want to get too small buffers638if pushed {639if let Some(last) = self.completed_buffers.pop() {640self.in_progress_buffer = last.into_mut().right().unwrap();641}642}643Ok(())644}645646/// Extend from a `buffer` and `length` of items given some statistics about the lengths.647///648/// This will attempt to dispatch to several optimized implementations.649///650/// # Safety651///652/// This is safe if the statistics are correct.653pub unsafe fn extend_from_lengths_with_stats(654&mut self,655buffer: &[u8],656lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,657min_length: usize,658max_length: usize,659sum_length: usize,660) {661let num_items = lengths_iterator.len();662663if num_items == 0 {664return;665}666667#[cfg(debug_assertions)]668{669let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(670(usize::MAX, usize::MIN, 0usize),671|(cmin, cmax, csum), (emin, emax, esum)| {672(cmin.min(emin), cmax.max(emax), csum + esum)673},674);675676assert_eq!(min, min_length);677assert_eq!(max, max_length);678assert_eq!(sum, sum_length);679}680681assert!(sum_length <= buffer.len());682683let mut buffer_offset = 0;684if min_length > View::MAX_INLINE_SIZE as usize685&& (num_items == 1 || sum_length + self.in_progress_buffer.len() <= u32::MAX as usize)686{687let buffer_idx = self.completed_buffers().len() as u32;688let in_progress_buffer_offset = self.in_progress_buffer.len();689690self.total_bytes_len += sum_length;691self.total_buffer_len += sum_length;692693self.in_progress_buffer694.extend_from_slice(&buffer[..sum_length]);695self.views.extend(lengths_iterator.map(|length| {696// SAFETY: We asserted before that the sum of all lengths is smaller or equal to697// the buffer length.698let view_buffer =699unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };700701// SAFETY: We know that the minimum length > View::MAX_INLINE_SIZE. Therefore, this702// length is > View::MAX_INLINE_SIZE.703let view = unsafe {704View::new_noninline_unchecked(705view_buffer,706buffer_idx,707(buffer_offset + in_progress_buffer_offset) as u32,708)709};710buffer_offset += length;711view712}));713} else if max_length <= View::MAX_INLINE_SIZE as usize {714self.total_bytes_len += sum_length;715716// If the min and max are the same, we can dispatch to the optimized SIMD717// implementation.718if min_length == max_length {719let length = min_length;720if length == 0 {721self.views722.resize(self.views.len() + num_items, View::new_inline(&[]));723} else {724View::extend_with_inlinable_strided(725&mut self.views,726&buffer[..length * num_items],727length as u8,728);729}730} else {731self.views.extend(lengths_iterator.map(|length| {732// SAFETY: We asserted before that the sum of all lengths is smaller or equal733// to the buffer length.734let view_buffer =735unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };736737// SAFETY: We know that each view has a length <= View::MAX_INLINE_SIZE because738// the maximum length is <= View::MAX_INLINE_SIZE739let view = unsafe { View::new_inline_unchecked(view_buffer) };740741buffer_offset += length;742743view744}));745}746} else {747// If all fails, just fall back to a base implementation.748self.reserve(num_items);749for length in lengths_iterator {750let value = &buffer[buffer_offset..buffer_offset + length];751buffer_offset += length;752self.push_value(value);753}754}755}756757/// Extend from a `buffer` and `length` of items.758///759/// This will attempt to dispatch to several optimized implementations.760#[inline]761pub fn extend_from_lengths(762&mut self,763buffer: &[u8],764lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,765) {766let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(767(usize::MAX, usize::MIN, 0usize),768|(cmin, cmax, csum), (emin, emax, esum)| (cmin.min(emin), cmax.max(emax), csum + esum),769);770771// SAFETY: We just collected the right stats.772unsafe { self.extend_from_lengths_with_stats(buffer, lengths_iterator, min, max, sum) }773}774}775776impl<T: ViewType + ?Sized, P: AsRef<T>> Extend<Option<P>> for MutableBinaryViewArray<T> {777#[inline]778fn extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) {779Self::extend(self, iter.into_iter())780}781}782783impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for MutableBinaryViewArray<T> {784#[inline]785fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {786Self::from_iterator(iter.into_iter())787}788}789790impl<T: ViewType + ?Sized> MutableArray for MutableBinaryViewArray<T> {791fn dtype(&self) -> &ArrowDataType {792T::dtype()793}794795fn len(&self) -> usize {796MutableBinaryViewArray::len(self)797}798799fn validity(&self) -> Option<&MutableBitmap> {800self.validity.as_ref()801}802803fn as_box(&mut self) -> Box<dyn Array> {804let mutable = std::mem::take(self);805let arr: BinaryViewArrayGeneric<T> = mutable.into();806arr.boxed()807}808809fn as_any(&self) -> &dyn Any {810self811}812813fn as_mut_any(&mut self) -> &mut dyn Any {814self815}816817fn push_null(&mut self) {818MutableBinaryViewArray::push_null(self)819}820821fn reserve(&mut self, additional: usize) {822MutableBinaryViewArray::reserve(self, additional)823}824825fn shrink_to_fit(&mut self) {826self.views.shrink_to_fit()827}828}829830impl<T: ViewType + ?Sized, P: AsRef<T>> TryExtend<Option<P>> for MutableBinaryViewArray<T> {831/// This is infallible and is implemented for consistency with all other types832#[inline]833fn try_extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) -> PolarsResult<()> {834self.extend(iter.into_iter());835Ok(())836}837}838839impl<T: ViewType + ?Sized, P: AsRef<T>> TryPush<Option<P>> for MutableBinaryViewArray<T> {840/// This is infallible and is implemented for consistency with all other types841#[inline(always)]842fn try_push(&mut self, item: Option<P>) -> PolarsResult<()> {843self.push(item.as_ref().map(|p| p.as_ref()));844Ok(())845}846}847848#[cfg(test)]849mod tests {850use super::*;851852fn roundtrip(values: &[&[u8]]) -> bool {853let buffer = values854.iter()855.flat_map(|v| v.iter().copied())856.collect::<Vec<u8>>();857let lengths = values.iter().map(|v| v.len()).collect::<Vec<usize>>();858let mut bv = MutableBinaryViewArray::<[u8]>::with_capacity(values.len());859860bv.extend_from_lengths(&buffer[..], lengths.into_iter());861862&bv.values_iter().collect::<Vec<&[u8]>>()[..] == values863}864865#[test]866fn extend_with_lengths_basic() {867assert!(roundtrip(&[]));868assert!(roundtrip(&[b"abc"]));869assert!(roundtrip(&[870b"a_very_very_long_string_that_is_not_inlinable"871]));872assert!(roundtrip(&[873b"abc",874b"a_very_very_long_string_that_is_not_inlinable"875]));876}877878#[test]879fn extend_with_inlinable_fastpath() {880assert!(roundtrip(&[b"abc", b"defg", b"hix"]));881assert!(roundtrip(&[b"abc", b"defg", b"hix", b"xyza1234abcd"]));882}883884#[test]885fn extend_with_inlinable_eq_len_fastpath() {886assert!(roundtrip(&[b"abc", b"def", b"hix"]));887assert!(roundtrip(&[b"abc", b"def", b"hix", b"xyz"]));888}889890#[test]891fn extend_with_not_inlinable_fastpath() {892assert!(roundtrip(&[893b"a_very_long_string123",894b"a_longer_string_than_the_previous"895]));896}897}898899900