Path: blob/main/crates/polars-arrow/src/array/primitive/mutable.rs
6939 views
use std::sync::Arc;12use polars_error::PolarsResult;34use super::{PrimitiveArray, check};5use crate::array::physical_binary::extend_validity;6use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};7use crate::bitmap::{Bitmap, MutableBitmap};8use crate::datatypes::ArrowDataType;9use crate::trusted_len::TrustedLen;10use crate::types::NativeType;1112/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).13/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.14#[derive(Debug, Clone)]15pub struct MutablePrimitiveArray<T: NativeType> {16dtype: ArrowDataType,17values: Vec<T>,18validity: Option<MutableBitmap>,19}2021impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {22fn from(other: MutablePrimitiveArray<T>) -> Self {23let validity = other.validity.and_then(|x| {24let bitmap: Bitmap = x.into();25if bitmap.unset_bits() == 0 {26None27} else {28Some(bitmap)29}30});3132PrimitiveArray::<T>::new(other.dtype, other.values.into(), validity)33}34}3536impl<T: NativeType, P: AsRef<[Option<T>]>> From<P> for MutablePrimitiveArray<T> {37fn from(slice: P) -> Self {38Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))39}40}4142impl<T: NativeType> MutablePrimitiveArray<T> {43/// Creates a new empty [`MutablePrimitiveArray`].44pub fn new() -> Self {45Self::with_capacity(0)46}4748/// Creates a new [`MutablePrimitiveArray`] with a capacity.49pub fn with_capacity(capacity: usize) -> Self {50Self::with_capacity_from(capacity, T::PRIMITIVE.into())51}5253/// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components.54/// # Implementation55/// This function is `O(1)`.56///57/// # Errors58/// This function errors iff:59/// * The validity is not `None` and its length is different from `values`'s length60/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`]61pub fn try_new(62dtype: ArrowDataType,63values: Vec<T>,64validity: Option<MutableBitmap>,65) -> PolarsResult<Self> {66check(&dtype, &values, validity.as_ref().map(|x| x.len()))?;67Ok(Self {68dtype,69values,70validity,71})72}7374/// Extract the low-end APIs from the [`MutablePrimitiveArray`].75pub fn into_inner(self) -> (ArrowDataType, Vec<T>, Option<MutableBitmap>) {76(self.dtype, self.values, self.validity)77}7879/// Applies a function `f` to the values of this array, cloning the values80/// iff they are being shared with others81///82/// This is an API to use clone-on-write83/// # Implementation84/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`85/// if it is being shared (since it results in a `O(N)` memcopy).86/// # Panics87/// This function panics iff `f` panics88pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {89f(&mut self.values);90}91}9293impl<T: NativeType> Default for MutablePrimitiveArray<T> {94fn default() -> Self {95Self::new()96}97}9899impl<T: NativeType> From<ArrowDataType> for MutablePrimitiveArray<T> {100fn from(dtype: ArrowDataType) -> Self {101assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));102Self {103dtype,104values: Vec::<T>::new(),105validity: None,106}107}108}109110impl<T: NativeType> MutablePrimitiveArray<T> {111/// Creates a new [`MutablePrimitiveArray`] from a capacity and [`ArrowDataType`].112pub fn with_capacity_from(capacity: usize, dtype: ArrowDataType) -> Self {113assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));114Self {115dtype,116values: Vec::<T>::with_capacity(capacity),117validity: None,118}119}120121/// Reserves `additional` entries.122pub fn reserve(&mut self, additional: usize) {123self.values.reserve(additional);124if let Some(x) = self.validity.as_mut() {125x.reserve(additional)126}127}128129#[inline]130pub fn push_value(&mut self, value: T) {131self.values.push(value);132if let Some(validity) = &mut self.validity {133validity.push(true)134}135}136137/// Adds a new value to the array.138#[inline]139pub fn push(&mut self, value: Option<T>) {140match value {141Some(value) => self.push_value(value),142None => {143self.values.push(T::default());144match &mut self.validity {145Some(validity) => validity.push(false),146None => {147self.init_validity();148},149}150},151}152}153154/// Pop a value from the array.155/// Note if the values is empty, this method will return None.156pub fn pop(&mut self) -> Option<T> {157let value = self.values.pop()?;158self.validity159.as_mut()160.map(|x| x.pop()?.then(|| value))161.unwrap_or_else(|| Some(value))162}163164/// Extends the [`MutablePrimitiveArray`] with a constant165#[inline]166pub fn extend_constant(&mut self, additional: usize, value: Option<T>) {167if let Some(value) = value {168self.values.resize(self.values.len() + additional, value);169if let Some(validity) = &mut self.validity {170validity.extend_constant(additional, true)171}172} else {173if let Some(validity) = &mut self.validity {174validity.extend_constant(additional, false)175} else {176let mut validity = MutableBitmap::with_capacity(self.values.capacity());177validity.extend_constant(self.len(), true);178validity.extend_constant(additional, false);179self.validity = Some(validity)180}181self.values182.resize(self.values.len() + additional, T::default());183}184}185186/// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.187#[inline]188pub fn extend_trusted_len<P, I>(&mut self, iterator: I)189where190P: std::borrow::Borrow<T>,191I: TrustedLen<Item = Option<P>>,192{193unsafe { self.extend_trusted_len_unchecked(iterator) }194}195196/// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.197///198/// # Safety199/// The iterator must be trusted len.200#[inline]201pub unsafe fn extend_trusted_len_unchecked<P, I>(&mut self, iterator: I)202where203P: std::borrow::Borrow<T>,204I: Iterator<Item = Option<P>>,205{206if let Some(validity) = self.validity.as_mut() {207extend_trusted_len_unzip(iterator, validity, &mut self.values)208} else {209let mut validity = MutableBitmap::new();210validity.extend_constant(self.len(), true);211extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);212self.validity = Some(validity);213}214}215/// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.216/// This differs from `extend_trusted_len` which accepts in iterator of optional values.217#[inline]218pub fn extend_trusted_len_values<I>(&mut self, iterator: I)219where220I: TrustedLen<Item = T>,221{222unsafe { self.extend_values(iterator) }223}224225/// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.226/// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values.227///228/// # Safety229/// The iterator must be trusted len.230#[inline]231pub fn extend_values<I>(&mut self, iterator: I)232where233I: Iterator<Item = T>,234{235self.values.extend(iterator);236self.update_all_valid();237}238239#[inline]240/// Extends the [`MutablePrimitiveArray`] from a slice241pub fn extend_from_slice(&mut self, items: &[T]) {242self.values.extend_from_slice(items);243self.update_all_valid();244}245246fn update_all_valid(&mut self) {247// get len before mutable borrow248let len = self.len();249if let Some(validity) = self.validity.as_mut() {250validity.extend_constant(len - validity.len(), true);251}252}253254fn init_validity(&mut self) {255let mut validity = MutableBitmap::with_capacity(self.values.capacity());256validity.extend_constant(self.len(), true);257validity.set(self.len() - 1, false);258self.validity = Some(validity)259}260261/// Changes the arrays' [`ArrowDataType`], returning a new [`MutablePrimitiveArray`].262/// Use to change the logical type without changing the corresponding physical Type.263/// # Implementation264/// This operation is `O(1)`.265#[inline]266pub fn to(self, dtype: ArrowDataType) -> Self {267Self::try_new(dtype, self.values, self.validity).unwrap()268}269270/// Converts itself into an [`Array`].271pub fn into_arc(self) -> Arc<dyn Array> {272let a: PrimitiveArray<T> = self.into();273Arc::new(a)274}275276/// Shrinks the capacity of the [`MutablePrimitiveArray`] to fit its current length.277pub fn shrink_to_fit(&mut self) {278self.values.shrink_to_fit();279if let Some(validity) = &mut self.validity {280validity.shrink_to_fit()281}282}283284/// Returns the capacity of this [`MutablePrimitiveArray`].285pub fn capacity(&self) -> usize {286self.values.capacity()287}288289pub fn freeze(self) -> PrimitiveArray<T> {290self.into()291}292293/// Clears the array, removing all values.294///295/// Note that this method has no effect on the allocated capacity296/// of the array.297pub fn clear(&mut self) {298self.values.clear();299self.validity = None;300}301302/// Apply a function that temporarily freezes this `MutableArray` into a `PrimitiveArray`.303pub fn with_freeze<K, F: FnOnce(&PrimitiveArray<T>) -> K>(&mut self, f: F) -> K {304let mutable = std::mem::take(self);305let arr = mutable.freeze();306let out = f(&arr);307*self = arr.into_mut().right().unwrap();308out309}310}311312/// Accessors313impl<T: NativeType> MutablePrimitiveArray<T> {314/// Returns its values.315pub fn values(&self) -> &Vec<T> {316&self.values317}318319/// Returns a mutable slice of values.320pub fn values_mut_slice(&mut self) -> &mut [T] {321self.values.as_mut_slice()322}323}324325/// Setters326impl<T: NativeType> MutablePrimitiveArray<T> {327/// Sets position `index` to `value`.328/// Note that if it is the first time a null appears in this array,329/// this initializes the validity bitmap (`O(N)`).330/// # Panic331/// Panics iff `index >= self.len()`.332pub fn set(&mut self, index: usize, value: Option<T>) {333assert!(index < self.len());334// SAFETY:335// we just checked bounds336unsafe { self.set_unchecked(index, value) }337}338339/// Sets position `index` to `value`.340/// Note that if it is the first time a null appears in this array,341/// this initializes the validity bitmap (`O(N)`).342///343/// # Safety344/// Caller must ensure `index < self.len()`345pub unsafe fn set_unchecked(&mut self, index: usize, value: Option<T>) {346*self.values.get_unchecked_mut(index) = value.unwrap_or_default();347348if value.is_none() && self.validity.is_none() {349// When the validity is None, all elements so far are valid. When one of the elements is set of null,350// the validity must be initialized.351let mut validity = MutableBitmap::new();352validity.extend_constant(self.len(), true);353self.validity = Some(validity);354}355if let Some(x) = self.validity.as_mut() {356x.set_unchecked(index, value.is_some())357}358}359360/// Sets the validity.361/// # Panic362/// Panics iff the validity's len is not equal to the existing values' length.363pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {364if let Some(validity) = &validity {365assert_eq!(self.values.len(), validity.len())366}367self.validity = validity;368}369370/// Sets values.371/// # Panic372/// Panics iff the values' length is not equal to the existing values' len.373pub fn set_values(&mut self, values: Vec<T>) {374assert_eq!(values.len(), self.values.len());375self.values = values;376}377}378379impl<T: NativeType> Extend<Option<T>> for MutablePrimitiveArray<T> {380fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {381let iter = iter.into_iter();382self.reserve(iter.size_hint().0);383iter.for_each(|x| self.push(x))384}385}386387impl<T: NativeType> TryExtend<Option<T>> for MutablePrimitiveArray<T> {388/// This is infallible and is implemented for consistency with all other types389fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {390self.extend(iter);391Ok(())392}393}394395impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {396/// This is infalible and is implemented for consistency with all other types397#[inline]398fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {399self.push(item);400Ok(())401}402}403404impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {405fn len(&self) -> usize {406self.values.len()407}408409fn validity(&self) -> Option<&MutableBitmap> {410self.validity.as_ref()411}412413fn as_box(&mut self) -> Box<dyn Array> {414PrimitiveArray::new(415self.dtype.clone(),416std::mem::take(&mut self.values).into(),417std::mem::take(&mut self.validity).map(|x| x.into()),418)419.boxed()420}421422fn as_arc(&mut self) -> Arc<dyn Array> {423PrimitiveArray::new(424self.dtype.clone(),425std::mem::take(&mut self.values).into(),426std::mem::take(&mut self.validity).map(|x| x.into()),427)428.arced()429}430431fn dtype(&self) -> &ArrowDataType {432&self.dtype433}434435fn as_any(&self) -> &dyn std::any::Any {436self437}438439fn as_mut_any(&mut self) -> &mut dyn std::any::Any {440self441}442443fn push_null(&mut self) {444self.push(None)445}446447fn reserve(&mut self, additional: usize) {448self.reserve(additional)449}450451fn shrink_to_fit(&mut self) {452self.shrink_to_fit()453}454}455456impl<T: NativeType> MutablePrimitiveArray<T> {457/// Creates a [`MutablePrimitiveArray`] from a slice of values.458pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {459Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())460}461462/// Creates a [`MutablePrimitiveArray`] from an iterator of trusted length.463///464/// # Safety465/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).466/// I.e. `size_hint().1` correctly reports its length.467#[inline]468pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self469where470P: std::borrow::Borrow<T>,471I: Iterator<Item = Option<P>>,472{473let (validity, values) = trusted_len_unzip(iterator);474475Self {476dtype: T::PRIMITIVE.into(),477values,478validity,479}480}481482/// Creates a [`MutablePrimitiveArray`] from a [`TrustedLen`].483#[inline]484pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self485where486P: std::borrow::Borrow<T>,487I: TrustedLen<Item = Option<P>>,488{489unsafe { Self::from_trusted_len_iter_unchecked(iterator) }490}491492/// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.493///494/// # Safety495/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).496/// I.e. that `size_hint().1` correctly reports its length.497#[inline]498pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(499iter: I,500) -> std::result::Result<Self, E>501where502P: std::borrow::Borrow<T>,503I: IntoIterator<Item = std::result::Result<Option<P>, E>>,504{505let iterator = iter.into_iter();506507let (validity, values) = try_trusted_len_unzip(iterator)?;508509Ok(Self {510dtype: T::PRIMITIVE.into(),511values,512validity,513})514}515516/// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.517#[inline]518pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>519where520P: std::borrow::Borrow<T>,521I: TrustedLen<Item = std::result::Result<Option<P>, E>>,522{523unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }524}525526/// Creates a new [`MutablePrimitiveArray`] out an iterator over values527pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {528Self {529dtype: T::PRIMITIVE.into(),530values: iter.collect(),531validity: None,532}533}534535/// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values.536/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].537pub fn from_vec(values: Vec<T>) -> Self {538Self::try_new(T::PRIMITIVE.into(), values, None).unwrap()539}540541/// Creates a new [`MutablePrimitiveArray`] from an iterator over values542///543/// # Safety544/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).545/// I.e. that `size_hint().1` correctly reports its length.546pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {547Self {548dtype: T::PRIMITIVE.into(),549values: iter.collect(),550validity: None,551}552}553}554555impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>556for MutablePrimitiveArray<T>557{558fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {559let iter = iter.into_iter();560let (lower, _) = iter.size_hint();561562let mut validity = MutableBitmap::with_capacity(lower);563564let values: Vec<T> = iter565.map(|item| {566if let Some(a) = item.borrow() {567validity.push(true);568*a569} else {570validity.push(false);571T::default()572}573})574.collect();575576let validity = Some(validity);577578Self {579dtype: T::PRIMITIVE.into(),580values,581validity,582}583}584}585586/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.587/// The first buffer corresponds to a bitmap buffer, the second one588/// corresponds to a values buffer.589/// # Safety590/// The caller must ensure that `iterator` is `TrustedLen`.591#[inline]592pub(crate) unsafe fn extend_trusted_len_unzip<I, P, T>(593iterator: I,594validity: &mut MutableBitmap,595buffer: &mut Vec<T>,596) where597T: NativeType,598P: std::borrow::Borrow<T>,599I: Iterator<Item = Option<P>>,600{601let (_, upper) = iterator.size_hint();602let additional = upper.expect("trusted_len_unzip requires an upper limit");603604validity.reserve(additional);605let values = iterator.map(|item| {606if let Some(item) = item {607validity.push_unchecked(true);608*item.borrow()609} else {610validity.push_unchecked(false);611T::default()612}613});614buffer.extend(values);615}616617/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.618/// The first buffer corresponds to a bitmap buffer, the second one619/// corresponds to a values buffer.620/// # Safety621/// The caller must ensure that `iterator` is `TrustedLen`.622#[inline]623pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Option<MutableBitmap>, Vec<T>)624where625T: NativeType,626P: std::borrow::Borrow<T>,627I: Iterator<Item = Option<P>>,628{629let mut validity = MutableBitmap::new();630let mut buffer = Vec::<T>::new();631632extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);633634let validity = Some(validity);635636(validity, buffer)637}638639/// # Safety640/// The caller must ensure that `iterator` is `TrustedLen`.641#[inline]642pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, T>(643iterator: I,644) -> std::result::Result<(Option<MutableBitmap>, Vec<T>), E>645where646T: NativeType,647P: std::borrow::Borrow<T>,648I: Iterator<Item = std::result::Result<Option<P>, E>>,649{650let (_, upper) = iterator.size_hint();651let len = upper.expect("trusted_len_unzip requires an upper limit");652653let mut null = MutableBitmap::with_capacity(len);654let mut buffer = Vec::<T>::with_capacity(len);655656let mut dst = buffer.as_mut_ptr();657for item in iterator {658let item = if let Some(item) = item? {659null.push(true);660*item.borrow()661} else {662null.push(false);663T::default()664};665std::ptr::write(dst, item);666dst = dst.add(1);667}668assert_eq!(669dst.offset_from(buffer.as_ptr()) as usize,670len,671"Trusted iterator length was not accurately reported"672);673buffer.set_len(len);674null.set_len(len);675676let validity = Some(null);677678Ok((validity, buffer))679}680681impl<T: NativeType> PartialEq for MutablePrimitiveArray<T> {682fn eq(&self, other: &Self) -> bool {683self.iter().eq(other.iter())684}685}686687impl<T: NativeType> TryExtendFromSelf for MutablePrimitiveArray<T> {688fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {689extend_validity(self.len(), &mut self.validity, &other.validity);690691let slice = other.values.as_slice();692self.values.extend_from_slice(slice);693Ok(())694}695}696697698