Path: blob/main/crates/polars-arrow/src/array/boolean/mod.rs
6939 views
use either::Either;1use polars_error::{PolarsResult, polars_bail};23use super::{Array, Splitable};4use crate::array::iterator::NonNullValuesIter;5use crate::bitmap::utils::{BitmapIter, ZipValidity};6use crate::bitmap::{Bitmap, MutableBitmap};7use crate::compute::utils::{combine_validities_and, combine_validities_or};8use crate::datatypes::{ArrowDataType, PhysicalType};9use crate::trusted_len::TrustedLen;1011mod ffi;12pub(super) mod fmt;13mod from;14mod iterator;15mod mutable;16pub use mutable::*;17mod builder;18pub use builder::*;19#[cfg(feature = "proptest")]20pub mod proptest;2122/// A [`BooleanArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<bool>>`.23/// It implements [`Array`].24///25/// One way to think about a [`BooleanArray`] is `(DataType, Arc<Vec<u8>>, Option<Arc<Vec<u8>>>)`26/// where:27/// * the first item is the array's logical type28/// * the second is the immutable values29/// * the third is the immutable validity (whether a value is null or not as a bitmap).30///31/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].32/// # Example33/// ```34/// use polars_arrow::array::BooleanArray;35/// use polars_arrow::bitmap::Bitmap;36/// use polars_arrow::buffer::Buffer;37///38/// let array = BooleanArray::from([Some(true), None, Some(false)]);39/// assert_eq!(array.value(0), true);40/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(true), None, Some(false)]);41/// assert_eq!(array.values_iter().collect::<Vec<_>>(), vec![true, false, false]);42/// // the underlying representation43/// assert_eq!(array.values(), &Bitmap::from([true, false, false]));44/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));45///46/// ```47#[derive(Clone)]48pub struct BooleanArray {49dtype: ArrowDataType,50values: Bitmap,51validity: Option<Bitmap>,52}5354impl BooleanArray {55/// The canonical method to create a [`BooleanArray`] out of low-end APIs.56/// # Errors57/// This function errors iff:58/// * The validity is not `None` and its length is different from `values`'s length59/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`].60pub fn try_new(61dtype: ArrowDataType,62values: Bitmap,63validity: Option<Bitmap>,64) -> PolarsResult<Self> {65if validity66.as_ref()67.is_some_and(|validity| validity.len() != values.len())68{69polars_bail!(ComputeError: "validity mask length must match the number of values")70}7172if dtype.to_physical_type() != PhysicalType::Boolean {73polars_bail!(ComputeError: "BooleanArray can only be initialized with a DataType whose physical type is Boolean")74}7576Ok(Self {77dtype,78values,79validity,80})81}8283/// Alias to `Self::try_new().unwrap()`84pub fn new(dtype: ArrowDataType, values: Bitmap, validity: Option<Bitmap>) -> Self {85Self::try_new(dtype, values, validity).unwrap()86}8788/// Returns an iterator over the optional values of this [`BooleanArray`].89#[inline]90pub fn iter(&self) -> ZipValidity<bool, BitmapIter<'_>, BitmapIter<'_>> {91ZipValidity::new_with_validity(self.values().iter(), self.validity())92}9394/// Returns an iterator over the values of this [`BooleanArray`].95#[inline]96pub fn values_iter(&self) -> BitmapIter<'_> {97self.values().iter()98}99100/// Returns an iterator of the non-null values.101#[inline]102pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, BooleanArray> {103NonNullValuesIter::new(self, self.validity())104}105106/// Returns the length of this array107#[inline]108pub fn len(&self) -> usize {109self.values.len()110}111112/// The values [`Bitmap`].113/// Values on null slots are undetermined (they can be anything).114#[inline]115pub fn values(&self) -> &Bitmap {116&self.values117}118119/// Returns the optional validity.120#[inline]121pub fn validity(&self) -> Option<&Bitmap> {122self.validity.as_ref()123}124125/// Returns the arrays' [`ArrowDataType`].126#[inline]127pub fn dtype(&self) -> &ArrowDataType {128&self.dtype129}130131/// Returns the value at index `i`132/// # Panic133/// This function panics iff `i >= self.len()`.134#[inline]135pub fn value(&self, i: usize) -> bool {136self.values.get_bit(i)137}138139/// Returns the element at index `i` as bool140///141/// # Safety142/// Caller must be sure that `i < self.len()`143#[inline]144pub unsafe fn value_unchecked(&self, i: usize) -> bool {145self.values.get_bit_unchecked(i)146}147148/// Returns the element at index `i` or `None` if it is null149/// # Panics150/// iff `i >= self.len()`151#[inline]152pub fn get(&self, i: usize) -> Option<bool> {153if !self.is_null(i) {154// soundness: Array::is_null panics if i >= self.len155unsafe { Some(self.value_unchecked(i)) }156} else {157None158}159}160161/// Slices this [`BooleanArray`].162/// # Implementation163/// This operation is `O(1)` as it amounts to increase up to two ref counts.164/// # Panic165/// This function panics iff `offset + length > self.len()`.166#[inline]167pub fn slice(&mut self, offset: usize, length: usize) {168assert!(169offset + length <= self.len(),170"the offset of the new Buffer cannot exceed the existing length"171);172unsafe { self.slice_unchecked(offset, length) }173}174175/// Slices this [`BooleanArray`].176/// # Implementation177/// This operation is `O(1)` as it amounts to increase two ref counts.178///179/// # Safety180/// The caller must ensure that `offset + length <= self.len()`.181#[inline]182pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {183self.validity = self184.validity185.take()186.map(|bitmap| bitmap.sliced_unchecked(offset, length))187.filter(|bitmap| bitmap.unset_bits() > 0);188self.values.slice_unchecked(offset, length);189}190191impl_sliced!();192impl_mut_validity!();193impl_into_array!();194195/// Returns a clone of this [`BooleanArray`] with new values.196/// # Panics197/// This function panics iff `values.len() != self.len()`.198#[must_use]199pub fn with_values(&self, values: Bitmap) -> Self {200let mut out = self.clone();201out.set_values(values);202out203}204205/// Sets the values of this [`BooleanArray`].206/// # Panics207/// This function panics iff `values.len() != self.len()`.208pub fn set_values(&mut self, values: Bitmap) {209assert_eq!(210values.len(),211self.len(),212"values length must be equal to this arrays length"213);214self.values = values;215}216217/// Applies a function `f` to the values of this array, cloning the values218/// iff they are being shared with others219///220/// This is an API to use clone-on-write221/// # Implementation222/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`223/// if it is being shared (since it results in a `O(N)` memcopy).224/// # Panics225/// This function panics if the function modifies the length of the [`MutableBitmap`].226pub fn apply_values_mut<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {227let values = std::mem::take(&mut self.values);228let mut values = values.make_mut();229f(&mut values);230if let Some(validity) = &self.validity {231assert_eq!(validity.len(), values.len());232}233self.values = values.into();234}235236/// Try to convert this [`BooleanArray`] to a [`MutableBooleanArray`]237pub fn into_mut(self) -> Either<Self, MutableBooleanArray> {238use Either::*;239240if let Some(bitmap) = self.validity {241match bitmap.into_mut() {242Left(bitmap) => Left(BooleanArray::new(self.dtype, self.values, Some(bitmap))),243Right(mutable_bitmap) => match self.values.into_mut() {244Left(immutable) => Left(BooleanArray::new(245self.dtype,246immutable,247Some(mutable_bitmap.into()),248)),249Right(mutable) => Right(250MutableBooleanArray::try_new(self.dtype, mutable, Some(mutable_bitmap))251.unwrap(),252),253},254}255} else {256match self.values.into_mut() {257Left(immutable) => Left(BooleanArray::new(self.dtype, immutable, None)),258Right(mutable) => {259Right(MutableBooleanArray::try_new(self.dtype, mutable, None).unwrap())260},261}262}263}264265/// Returns a new empty [`BooleanArray`].266pub fn new_empty(dtype: ArrowDataType) -> Self {267Self::new(dtype, Bitmap::new(), None)268}269270/// Returns a new [`BooleanArray`] whose all slots are null / `None`.271pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {272let bitmap = Bitmap::new_zeroed(length);273Self::new(dtype, bitmap.clone(), Some(bitmap))274}275276/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.277#[inline]278pub fn from_trusted_len_values_iter<I: TrustedLen<Item = bool>>(iterator: I) -> Self {279MutableBooleanArray::from_trusted_len_values_iter(iterator).into()280}281282/// Creates a new [`BooleanArray`] from an [`TrustedLen`] of `bool`.283/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len284/// but this crate does not mark it as such.285///286/// # Safety287/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).288/// I.e. that `size_hint().1` correctly reports its length.289#[inline]290pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = bool>>(291iterator: I,292) -> Self {293MutableBooleanArray::from_trusted_len_values_iter_unchecked(iterator).into()294}295296/// Creates a new [`BooleanArray`] from a slice of `bool`.297#[inline]298pub fn from_slice<P: AsRef<[bool]>>(slice: P) -> Self {299MutableBooleanArray::from_slice(slice).into()300}301302/// Creates a [`BooleanArray`] from an iterator of trusted length.303/// Use this over [`BooleanArray::from_trusted_len_iter`] when the iterator is trusted len304/// but this crate does not mark it as such.305///306/// # Safety307/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).308/// I.e. that `size_hint().1` correctly reports its length.309#[inline]310pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self311where312P: std::borrow::Borrow<bool>,313I: Iterator<Item = Option<P>>,314{315MutableBooleanArray::from_trusted_len_iter_unchecked(iterator).into()316}317318/// Creates a [`BooleanArray`] from a [`TrustedLen`].319#[inline]320pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self321where322P: std::borrow::Borrow<bool>,323I: TrustedLen<Item = Option<P>>,324{325MutableBooleanArray::from_trusted_len_iter(iterator).into()326}327328/// Creates a [`BooleanArray`] from an falible iterator of trusted length.329///330/// # Safety331/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).332/// I.e. that `size_hint().1` correctly reports its length.333#[inline]334pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(iterator: I) -> Result<Self, E>335where336P: std::borrow::Borrow<bool>,337I: Iterator<Item = Result<Option<P>, E>>,338{339Ok(MutableBooleanArray::try_from_trusted_len_iter_unchecked(iterator)?.into())340}341342/// Creates a [`BooleanArray`] from a [`TrustedLen`].343#[inline]344pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> Result<Self, E>345where346P: std::borrow::Borrow<bool>,347I: TrustedLen<Item = Result<Option<P>, E>>,348{349Ok(MutableBooleanArray::try_from_trusted_len_iter(iterator)?.into())350}351352pub fn true_and_valid(&self) -> Bitmap {353match &self.validity {354None => self.values.clone(),355Some(validity) => combine_validities_and(Some(&self.values), Some(validity)).unwrap(),356}357}358359pub fn true_or_valid(&self) -> Bitmap {360match &self.validity {361None => self.values.clone(),362Some(validity) => combine_validities_or(Some(&self.values), Some(validity)).unwrap(),363}364}365366/// Returns its internal representation367#[must_use]368pub fn into_inner(self) -> (ArrowDataType, Bitmap, Option<Bitmap>) {369let Self {370dtype,371values,372validity,373} = self;374(dtype, values, validity)375}376377/// Creates a [`BooleanArray`] from its internal representation.378/// This is the inverted from [`BooleanArray::into_inner`]379///380/// # Safety381/// Callers must ensure all invariants of this struct are upheld.382pub unsafe fn from_inner_unchecked(383dtype: ArrowDataType,384values: Bitmap,385validity: Option<Bitmap>,386) -> Self {387Self {388dtype,389values,390validity,391}392}393}394395impl Array for BooleanArray {396impl_common_array!();397398fn validity(&self) -> Option<&Bitmap> {399self.validity.as_ref()400}401402#[inline]403fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {404Box::new(self.clone().with_validity(validity))405}406}407408impl Splitable for BooleanArray {409fn check_bound(&self, offset: usize) -> bool {410offset <= self.len()411}412413unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {414let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };415let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };416417(418Self {419dtype: self.dtype.clone(),420values: lhs_values,421validity: lhs_validity,422},423Self {424dtype: self.dtype.clone(),425values: rhs_values,426validity: rhs_validity,427},428)429}430}431432impl From<Bitmap> for BooleanArray {433fn from(values: Bitmap) -> Self {434Self {435dtype: ArrowDataType::Boolean,436values,437validity: None,438}439}440}441442443