Path: blob/main/crates/polars-arrow/src/array/boolean/mutable.rs
6939 views
use std::sync::Arc;12use polars_error::{PolarsResult, polars_bail};34use super::BooleanArray;5use crate::array::physical_binary::extend_validity;6use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};7use crate::bitmap::MutableBitmap;8use crate::datatypes::{ArrowDataType, PhysicalType};9use crate::trusted_len::TrustedLen;1011/// The Arrow's equivalent to `Vec<Option<bool>>`, but with `1/16` of its size.12/// Converting a [`MutableBooleanArray`] into a [`BooleanArray`] is `O(1)`.13/// # Implementation14/// This struct does not allocate a validity until one is required (i.e. push a null to it).15#[derive(Debug, Clone)]16pub struct MutableBooleanArray {17dtype: ArrowDataType,18values: MutableBitmap,19validity: Option<MutableBitmap>,20}2122impl From<MutableBooleanArray> for BooleanArray {23fn from(other: MutableBooleanArray) -> Self {24BooleanArray::new(25other.dtype,26other.values.into(),27other.validity.map(|x| x.into()),28)29}30}3132impl<P: AsRef<[Option<bool>]>> From<P> for MutableBooleanArray {33/// Creates a new [`MutableBooleanArray`] out of a slice of Optional `bool`.34fn from(slice: P) -> Self {35Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))36}37}3839impl Default for MutableBooleanArray {40fn default() -> Self {41Self::new()42}43}4445impl MutableBooleanArray {46/// Creates an new empty [`MutableBooleanArray`].47pub fn new() -> Self {48Self::with_capacity(0)49}5051/// The canonical method to create a [`MutableBooleanArray`] out of low-end APIs.52/// # Errors53/// This function errors iff:54/// * The validity is not `None` and its length is different from `values`'s length55/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].56pub fn try_new(57dtype: ArrowDataType,58values: MutableBitmap,59validity: Option<MutableBitmap>,60) -> PolarsResult<Self> {61if validity62.as_ref()63.is_some_and(|validity| validity.len() != values.len())64{65polars_bail!(ComputeError:66"validity mask length must match the number of values",67)68}6970if dtype.to_physical_type() != PhysicalType::Boolean {71polars_bail!(72oos = "MutableBooleanArray can only be initialized with a DataType whose physical type is Boolean",73)74}7576Ok(Self {77dtype,78values,79validity,80})81}8283/// Creates an new [`MutableBooleanArray`] with a capacity of values.84pub fn with_capacity(capacity: usize) -> Self {85Self {86dtype: ArrowDataType::Boolean,87values: MutableBitmap::with_capacity(capacity),88validity: None,89}90}9192/// Reserves `additional` slots.93pub fn reserve(&mut self, additional: usize) {94self.values.reserve(additional);95if let Some(x) = self.validity.as_mut() {96x.reserve(additional)97}98}99100#[inline]101pub fn push_value(&mut self, value: bool) {102self.values.push(value);103if let Some(validity) = &mut self.validity {104validity.push(true)105}106}107108#[inline]109pub fn push_null(&mut self) {110self.values.push(false);111match &mut self.validity {112Some(validity) => validity.push(false),113None => self.init_validity(),114}115}116117/// Pushes a new entry to [`MutableBooleanArray`].118#[inline]119pub fn push(&mut self, value: Option<bool>) {120match value {121Some(value) => self.push_value(value),122None => self.push_null(),123}124}125126/// Pop an entry from [`MutableBooleanArray`].127/// Note If the values is empty, this method will return None.128pub fn pop(&mut self) -> Option<bool> {129let value = self.values.pop()?;130self.validity131.as_mut()132.map(|x| x.pop()?.then(|| value))133.unwrap_or_else(|| Some(value))134}135136/// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.137/// This differs from `extend_trusted_len` which accepts in iterator of optional values.138#[inline]139pub fn extend_trusted_len_values<I>(&mut self, iterator: I)140where141I: TrustedLen<Item = bool>,142{143// SAFETY: `I` is `TrustedLen`144unsafe { self.extend_trusted_len_values_unchecked(iterator) }145}146147/// Extends the [`MutableBooleanArray`] from an iterator of values of trusted len.148/// This differs from `extend_trusted_len_unchecked`, which accepts in iterator of optional values.149///150/// # Safety151/// The iterator must be trusted len.152#[inline]153pub unsafe fn extend_trusted_len_values_unchecked<I>(&mut self, iterator: I)154where155I: Iterator<Item = bool>,156{157let (_, upper) = iterator.size_hint();158let additional =159upper.expect("extend_trusted_len_values_unchecked requires an upper limit");160161if let Some(validity) = self.validity.as_mut() {162validity.extend_constant(additional, true);163}164165self.values.extend_from_trusted_len_iter_unchecked(iterator)166}167168/// Extends the [`MutableBooleanArray`] from an iterator of trusted len.169#[inline]170pub fn extend_trusted_len<I, P>(&mut self, iterator: I)171where172P: std::borrow::Borrow<bool>,173I: TrustedLen<Item = Option<P>>,174{175// SAFETY: `I` is `TrustedLen`176unsafe { self.extend_trusted_len_unchecked(iterator) }177}178179/// Extends the [`MutableBooleanArray`] from an iterator of trusted len.180///181/// # Safety182/// The iterator must be trusted len.183#[inline]184pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)185where186P: std::borrow::Borrow<bool>,187I: Iterator<Item = Option<P>>,188{189if let Some(validity) = self.validity.as_mut() {190extend_trusted_len_unzip(iterator, validity, &mut self.values);191} else {192let mut validity = MutableBitmap::new();193validity.extend_constant(self.len(), true);194195extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);196197if validity.unset_bits() > 0 {198self.validity = Some(validity);199}200}201}202203/// Extends `MutableBooleanArray` by additional values of constant value.204#[inline]205pub fn extend_constant(&mut self, additional: usize, value: Option<bool>) {206match value {207Some(value) => {208self.values.extend_constant(additional, value);209if let Some(validity) = self.validity.as_mut() {210validity.extend_constant(additional, true);211}212},213None => {214self.values.extend_constant(additional, false);215if let Some(validity) = self.validity.as_mut() {216validity.extend_constant(additional, false)217} else {218self.init_validity();219self.validity220.as_mut()221.unwrap()222.extend_constant(additional, false)223};224},225};226}227228fn init_validity(&mut self) {229let mut validity = MutableBitmap::with_capacity(self.values.capacity());230validity.extend_constant(self.len(), true);231validity.set(self.len() - 1, false);232self.validity = Some(validity)233}234235/// Converts itself into an [`Array`].236pub fn into_arc(self) -> Arc<dyn Array> {237let a: BooleanArray = self.into();238Arc::new(a)239}240241pub fn freeze(self) -> BooleanArray {242self.into()243}244}245246/// Getters247impl MutableBooleanArray {248/// Returns its values.249pub fn values(&self) -> &MutableBitmap {250&self.values251}252}253254/// Setters255impl MutableBooleanArray {256/// Sets position `index` to `value`.257/// Note that if it is the first time a null appears in this array,258/// this initializes the validity bitmap (`O(N)`).259/// # Panic260/// Panics iff index is larger than `self.len()`.261pub fn set(&mut self, index: usize, value: Option<bool>) {262self.values.set(index, value.unwrap_or_default());263264if value.is_none() && self.validity.is_none() {265// When the validity is None, all elements so far are valid. When one of the elements is set of null,266// the validity must be initialized.267self.validity = Some(MutableBitmap::from_trusted_len_iter(std::iter::repeat_n(268true,269self.len(),270)));271}272if let Some(x) = self.validity.as_mut() {273x.set(index, value.is_some())274}275}276}277278/// From implementations279impl MutableBooleanArray {280/// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.281#[inline]282pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {283Self::try_new(284ArrowDataType::Boolean,285MutableBitmap::from_trusted_len_iter(iterator),286None,287)288.unwrap()289}290291/// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`.292/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len293/// but this crate does not mark it as such.294///295/// # Safety296/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).297/// I.e. that `size_hint().1` correctly reports its length.298#[inline]299pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(300iterator: I,301) -> Self {302let mut mutable = MutableBitmap::new();303mutable.extend_from_trusted_len_iter_unchecked(iterator);304MutableBooleanArray::try_new(ArrowDataType::Boolean, mutable, None).unwrap()305}306307/// Creates a new [`MutableBooleanArray`] from a slice of `bool`.308#[inline]309pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {310Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())311}312313/// Creates a [`BooleanArray`] from an iterator of trusted length.314/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len315/// but this crate does not mark it as such.316///317/// # Safety318/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).319/// I.e. that `size_hint().1` correctly reports its length.320#[inline]321pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self322where323P: std::borrow::Borrow<bool>,324I: Iterator<Item = Option<P>>,325{326let (validity, values) = trusted_len_unzip(iterator);327328Self::try_new(ArrowDataType::Boolean, values, validity).unwrap()329}330331/// Creates a [`BooleanArray`] from a [`TrustedLen`].332#[inline]333pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self334where335P: std::borrow::Borrow<bool>,336I: TrustedLen<Item = Option<P>>,337{338// SAFETY: `I` is `TrustedLen`339unsafe { Self::from_trusted_len_iter_unchecked(iterator) }340}341342/// Creates a [`BooleanArray`] from an falible iterator of trusted length.343///344/// # Safety345/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).346/// I.e. that `size_hint().1` correctly reports its length.347#[inline]348pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(349iterator: I,350) -> std::result::Result<Self, E>351where352P: std::borrow::Borrow<bool>,353I: Iterator<Item = std::result::Result<Option<P>, E>>,354{355let (validity, values) = try_trusted_len_unzip(iterator)?;356357let validity = if validity.unset_bits() > 0 {358Some(validity)359} else {360None361};362363Ok(Self::try_new(ArrowDataType::Boolean, values, validity).unwrap())364}365366/// Creates a [`BooleanArray`] from a [`TrustedLen`].367#[inline]368pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>369where370P: std::borrow::Borrow<bool>,371I: TrustedLen<Item = std::result::Result<Option<P>, E>>,372{373// SAFETY: `I` is `TrustedLen`374unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }375}376377/// Shrinks the capacity of the [`MutableBooleanArray`] to fit its current length.378pub fn shrink_to_fit(&mut self) {379self.values.shrink_to_fit();380if let Some(validity) = &mut self.validity {381validity.shrink_to_fit()382}383}384}385386/// Creates a Bitmap and an optional [`MutableBitmap`] from an iterator of `Option<bool>`.387/// The first buffer corresponds to a bitmap buffer, the second one388/// corresponds to a values buffer.389/// # Safety390/// The caller must ensure that `iterator` is `TrustedLen`.391#[inline]392pub(crate) unsafe fn trusted_len_unzip<I, P>(iterator: I) -> (Option<MutableBitmap>, MutableBitmap)393where394P: std::borrow::Borrow<bool>,395I: Iterator<Item = Option<P>>,396{397let mut validity = MutableBitmap::new();398let mut values = MutableBitmap::new();399400extend_trusted_len_unzip(iterator, &mut validity, &mut values);401402let validity = if validity.unset_bits() > 0 {403Some(validity)404} else {405None406};407408(validity, values)409}410411/// Extends validity [`MutableBitmap`] and values [`MutableBitmap`] from an iterator of `Option`.412/// # Safety413/// The caller must ensure that `iterator` is `TrustedLen`.414#[inline]415pub(crate) unsafe fn extend_trusted_len_unzip<I, P>(416iterator: I,417validity: &mut MutableBitmap,418values: &mut MutableBitmap,419) where420P: std::borrow::Borrow<bool>,421I: Iterator<Item = Option<P>>,422{423let (_, upper) = iterator.size_hint();424let additional = upper.expect("extend_trusted_len_unzip requires an upper limit");425426// Length of the array before new values are pushed,427// variable created for assertion post operation428let pre_length = values.len();429430validity.reserve(additional);431values.reserve(additional);432433for item in iterator {434let item = if let Some(item) = item {435validity.push_unchecked(true);436*item.borrow()437} else {438validity.push_unchecked(false);439bool::default()440};441values.push_unchecked(item);442}443444debug_assert_eq!(445values.len(),446pre_length + additional,447"Trusted iterator length was not accurately reported"448);449}450451/// # Safety452/// The caller must ensure that `iterator` is `TrustedLen`.453#[inline]454pub(crate) unsafe fn try_trusted_len_unzip<E, I, P>(455iterator: I,456) -> std::result::Result<(MutableBitmap, MutableBitmap), E>457where458P: std::borrow::Borrow<bool>,459I: Iterator<Item = std::result::Result<Option<P>, E>>,460{461let (_, upper) = iterator.size_hint();462let len = upper.expect("trusted_len_unzip requires an upper limit");463464let mut null = MutableBitmap::with_capacity(len);465let mut values = MutableBitmap::with_capacity(len);466467for item in iterator {468let item = if let Some(item) = item? {469null.push(true);470*item.borrow()471} else {472null.push(false);473false474};475values.push(item);476}477assert_eq!(478values.len(),479len,480"Trusted iterator length was not accurately reported"481);482values.set_len(len);483null.set_len(len);484485Ok((null, values))486}487488impl<Ptr: std::borrow::Borrow<Option<bool>>> FromIterator<Ptr> for MutableBooleanArray {489fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {490let iter = iter.into_iter();491let (lower, _) = iter.size_hint();492493let mut validity = MutableBitmap::with_capacity(lower);494495let values: MutableBitmap = iter496.map(|item| {497if let Some(a) = item.borrow() {498validity.push(true);499*a500} else {501validity.push(false);502false503}504})505.collect();506507let validity = if validity.unset_bits() > 0 {508Some(validity)509} else {510None511};512513MutableBooleanArray::try_new(ArrowDataType::Boolean, values, validity).unwrap()514}515}516517impl MutableArray for MutableBooleanArray {518fn len(&self) -> usize {519self.values.len()520}521522fn validity(&self) -> Option<&MutableBitmap> {523self.validity.as_ref()524}525526fn as_box(&mut self) -> Box<dyn Array> {527let array: BooleanArray = std::mem::take(self).into();528array.boxed()529}530531fn as_arc(&mut self) -> Arc<dyn Array> {532let array: BooleanArray = std::mem::take(self).into();533array.arced()534}535536fn dtype(&self) -> &ArrowDataType {537&self.dtype538}539540fn as_any(&self) -> &dyn std::any::Any {541self542}543544fn as_mut_any(&mut self) -> &mut dyn std::any::Any {545self546}547548#[inline]549fn push_null(&mut self) {550self.push(None)551}552553fn reserve(&mut self, additional: usize) {554self.reserve(additional)555}556557fn shrink_to_fit(&mut self) {558self.shrink_to_fit()559}560}561562impl Extend<Option<bool>> for MutableBooleanArray {563fn extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) {564let iter = iter.into_iter();565self.reserve(iter.size_hint().0);566iter.for_each(|x| self.push(x))567}568}569570impl TryExtend<Option<bool>> for MutableBooleanArray {571/// This is infalible and is implemented for consistency with all other types572fn try_extend<I: IntoIterator<Item = Option<bool>>>(&mut self, iter: I) -> PolarsResult<()> {573self.extend(iter);574Ok(())575}576}577578impl TryPush<Option<bool>> for MutableBooleanArray {579/// This is infalible and is implemented for consistency with all other types580fn try_push(&mut self, item: Option<bool>) -> PolarsResult<()> {581self.push(item);582Ok(())583}584}585586impl PartialEq for MutableBooleanArray {587fn eq(&self, other: &Self) -> bool {588self.iter().eq(other.iter())589}590}591592impl TryExtendFromSelf for MutableBooleanArray {593fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {594extend_validity(self.len(), &mut self.validity, &other.validity);595596let slice = other.values.as_slice();597// SAFETY: invariant offset + length <= slice.len()598unsafe {599self.values600.extend_from_slice_unchecked(slice, 0, other.values.len());601}602Ok(())603}604}605606607