Path: blob/main/crates/polars-arrow/src/array/primitive/mod.rs
8406 views
use std::ops::Range;12use either::Either;3use polars_buffer::Buffer;4use polars_utils::float16::pf16;56use super::{Array, Splitable};7use crate::array::iterator::NonNullValuesIter;8use crate::bitmap::Bitmap;9use crate::bitmap::utils::{BitmapIter, ZipValidity};10use crate::datatypes::*;11use crate::trusted_len::TrustedLen;12use crate::types::{NativeType, days_ms, i256, months_days_ns};1314mod ffi;15pub(super) mod fmt;16mod from_natural;17pub mod iterator;18#[cfg(feature = "proptest")]19pub mod proptest;2021mod mutable;22pub use mutable::*;23mod builder;24pub use builder::*;25use polars_error::{PolarsResult, polars_bail};26use polars_utils::index::{Bounded, Indexable, NullCount};27use polars_utils::slice::SliceAble;2829/// A [`PrimitiveArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<T>>` where30/// T is [`NativeType`] (e.g. [`i32`]). It implements [`Array`].31///32/// One way to think about a [`PrimitiveArray`] is `(DataType, Arc<Vec<T>>, Option<Arc<Vec<u8>>>)`33/// where:34/// * the first item is the array's logical type35/// * the second is the immutable values36/// * the third is the immutable validity (whether a value is null or not as a bitmap).37///38/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].39/// # Example40/// ```41/// use polars_arrow::array::PrimitiveArray;42/// use polars_arrow::bitmap::Bitmap;43/// use polars_buffer::Buffer;44///45/// let array = PrimitiveArray::from([Some(1i32), None, Some(10)]);46/// assert_eq!(array.value(0), 1);47/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(&1i32), None, Some(&10)]);48/// assert_eq!(array.values_iter().copied().collect::<Vec<_>>(), vec![1, 0, 10]);49/// // the underlying representation50/// assert_eq!(array.values(), &Buffer::from(vec![1i32, 0, 10]));51/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));52///53/// ```54#[derive(Clone)]55pub struct PrimitiveArray<T: NativeType> {56dtype: ArrowDataType,57values: Buffer<T>,58validity: Option<Bitmap>,59}6061pub(super) fn check<T: NativeType>(62dtype: &ArrowDataType,63values: &[T],64validity_len: Option<usize>,65) -> PolarsResult<()> {66if validity_len.is_some_and(|len| len != values.len()) {67polars_bail!(ComputeError: "validity mask length must match the number of values")68}6970if dtype.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {71polars_bail!(ComputeError: "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive")72}73Ok(())74}7576impl<T: NativeType> PrimitiveArray<T> {77/// The canonical method to create a [`PrimitiveArray`] out of its internal components.78/// # Implementation79/// This function is `O(1)`.80///81/// # Errors82/// This function errors iff:83/// * The validity is not `None` and its length is different from `values`'s length84/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]85pub fn try_new(86dtype: ArrowDataType,87values: Buffer<T>,88validity: Option<Bitmap>,89) -> PolarsResult<Self> {90check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;91Ok(Self {92dtype,93values,94validity,95})96}9798/// # Safety99/// Doesn't check invariants100pub unsafe fn new_unchecked(101dtype: ArrowDataType,102values: Buffer<T>,103validity: Option<Bitmap>,104) -> Self {105if cfg!(debug_assertions) {106check(&dtype, &values, validity.as_ref().map(|v| v.len())).unwrap();107}108109Self {110dtype,111values,112validity,113}114}115116/// Returns a new [`PrimitiveArray`] with a different logical type.117///118/// This function is useful to assign a different [`ArrowDataType`] to the array.119/// Used to change the arrays' logical type (see example).120/// # Example121/// ```122/// use polars_arrow::array::Int32Array;123/// use polars_arrow::datatypes::ArrowDataType;124///125/// let array = Int32Array::from(&[Some(1), None, Some(2)]).to(ArrowDataType::Date32);126/// assert_eq!(127/// format!("{:?}", array),128/// "Date32[1970-01-02, None, 1970-01-03]"129/// );130/// ```131/// # Panics132/// Panics iff the `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]133#[inline]134#[must_use]135pub fn to(self, dtype: ArrowDataType) -> Self {136check(137&dtype,138&self.values,139self.validity.as_ref().map(|v| v.len()),140)141.unwrap();142Self {143dtype,144values: self.values,145validity: self.validity,146}147}148149/// Creates a (non-null) [`PrimitiveArray`] from a vector of values.150/// This function is `O(1)`.151/// # Examples152/// ```153/// use polars_arrow::array::PrimitiveArray;154///155/// let array = PrimitiveArray::from_vec(vec![1, 2, 3]);156/// assert_eq!(format!("{:?}", array), "Int32[1, 2, 3]");157/// ```158pub fn from_vec(values: Vec<T>) -> Self {159Self::new(T::PRIMITIVE.into(), values.into(), None)160}161162/// Returns an iterator over the values and validity, `Option<&T>`.163#[inline]164pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<'_, T>, BitmapIter<'_>> {165ZipValidity::new_with_validity(self.values().iter(), self.validity())166}167168/// Returns an iterator of the values, `&T`, ignoring the arrays' validity.169#[inline]170pub fn values_iter(&self) -> std::slice::Iter<'_, T> {171self.values().iter()172}173174/// Returns an iterator of the non-null values `T`.175#[inline]176pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, [T]> {177NonNullValuesIter::new(self.values(), self.validity())178}179180/// Returns the length of this array181#[inline]182pub fn len(&self) -> usize {183self.values.len()184}185186/// The values [`Buffer`].187/// Values on null slots are undetermined (they can be anything).188#[inline]189pub fn values(&self) -> &Buffer<T> {190&self.values191}192193/// Returns the optional validity.194#[inline]195pub fn validity(&self) -> Option<&Bitmap> {196self.validity.as_ref()197}198199/// Returns the arrays' [`ArrowDataType`].200#[inline]201pub fn dtype(&self) -> &ArrowDataType {202&self.dtype203}204205/// Returns the value at slot `i`.206///207/// Equivalent to `self.values()[i]`. The value of a null slot is undetermined (it can be anything).208/// # Panic209/// This function panics iff `i >= self.len`.210#[inline]211pub fn value(&self, i: usize) -> T {212self.values[i]213}214215/// Returns the value at index `i`.216/// The value on null slots is undetermined (it can be anything).217///218/// # Safety219/// Caller must be sure that `i < self.len()`220#[inline]221pub unsafe fn value_unchecked(&self, i: usize) -> T {222*self.values.get_unchecked(i)223}224225// /// Returns the element at index `i` or `None` if it is null226// /// # Panics227// /// iff `i >= self.len()`228// #[inline]229// pub fn get(&self, i: usize) -> Option<T> {230// if !self.is_null(i) {231// // soundness: Array::is_null panics if i >= self.len232// unsafe { Some(self.value_unchecked(i)) }233// } else {234// None235// }236// }237238/// Slices this [`PrimitiveArray`] by an offset and length.239/// # Implementation240/// This operation is `O(1)`.241#[inline]242pub fn slice(&mut self, offset: usize, length: usize) {243assert!(244offset + length <= self.len(),245"offset + length may not exceed length of array"246);247unsafe { self.slice_unchecked(offset, length) }248}249250/// Slices this [`PrimitiveArray`] by an offset and length.251/// # Implementation252/// This operation is `O(1)`.253///254/// # Safety255/// The caller must ensure that `offset + length <= self.len()`.256#[inline]257pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {258self.validity = self259.validity260.take()261.map(|bitmap| bitmap.sliced_unchecked(offset, length))262.filter(|bitmap| bitmap.unset_bits() > 0);263self.values264.slice_in_place_unchecked(offset..offset + length);265}266267impl_sliced!();268impl_mut_validity!();269impl_into_array!();270271/// Returns this [`PrimitiveArray`] with new values.272/// # Panics273/// This function panics iff `values.len() != self.len()`.274#[must_use]275pub fn with_values(mut self, values: Buffer<T>) -> Self {276self.set_values(values);277self278}279280/// Update the values of this [`PrimitiveArray`].281/// # Panics282/// This function panics iff `values.len() != self.len()`.283pub fn set_values(&mut self, values: Buffer<T>) {284assert_eq!(285values.len(),286self.len(),287"values' length must be equal to this arrays' length"288);289self.values = values;290}291292/// Applies a function `f` to the validity of this array.293///294/// # Panics295/// This function panics if the function `f` modifies the length of the [`Bitmap`].296pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {297if let Some(validity) = std::mem::take(&mut self.validity) {298self.set_validity(Some(f(validity)))299}300}301302/// Applies a function `f` to the values of this array, ignoring validity,303/// in-place if possible.304pub fn with_values_mut<F: FnOnce(&mut [T])>(&mut self, f: F) {305if let Some(slice) = self.values.get_mut_slice() {306f(slice)307} else {308let mut values = self.values.as_slice().to_vec();309f(&mut values);310self.values = Buffer::from(values);311}312}313314/// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].315pub fn get_mut_values(&mut self) -> Option<&mut [T]> {316self.values.get_mut_slice()317}318319/// Returns its internal representation320#[must_use]321pub fn into_inner(self) -> (ArrowDataType, Buffer<T>, Option<Bitmap>) {322let Self {323dtype,324values,325validity,326} = self;327(dtype, values, validity)328}329330/// Creates a [`PrimitiveArray`] from its internal representation.331/// This is the inverted from [`PrimitiveArray::into_inner`]332pub fn from_inner(333dtype: ArrowDataType,334values: Buffer<T>,335validity: Option<Bitmap>,336) -> PolarsResult<Self> {337check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;338Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })339}340341/// Creates a [`PrimitiveArray`] from its internal representation.342/// This is the inverted from [`PrimitiveArray::into_inner`]343///344/// # Safety345/// Callers must ensure all invariants of this struct are upheld.346pub unsafe fn from_inner_unchecked(347dtype: ArrowDataType,348values: Buffer<T>,349validity: Option<Bitmap>,350) -> Self {351Self {352dtype,353values,354validity,355}356}357358/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.359///360/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.361/// This function returns a [`MutablePrimitiveArray`] (via [`std::sync::Arc::get_mut`]) iff both values362/// and validity have not been cloned / are unique references to their underlying vectors.363///364/// This function is primarily used to reuse memory regions.365#[must_use]366pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {367use Either::*;368369if let Some(bitmap) = self.validity {370match bitmap.into_mut() {371Left(bitmap) => Left(PrimitiveArray::new(self.dtype, self.values, Some(bitmap))),372Right(mutable_bitmap) => match self.values.into_mut() {373Right(values) => Right(374MutablePrimitiveArray::try_new(self.dtype, values, Some(mutable_bitmap))375.unwrap(),376),377Left(values) => Left(PrimitiveArray::new(378self.dtype,379values,380Some(mutable_bitmap.into()),381)),382},383}384} else {385match self.values.into_mut() {386Right(values) => {387Right(MutablePrimitiveArray::try_new(self.dtype, values, None).unwrap())388},389Left(values) => Left(PrimitiveArray::new(self.dtype, values, None)),390}391}392}393394/// Returns a new empty (zero-length) [`PrimitiveArray`].395pub fn new_empty(dtype: ArrowDataType) -> Self {396Self::new(dtype, Buffer::new(), None)397}398399/// Returns a new [`PrimitiveArray`] where all slots are null / `None`.400#[inline]401pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {402Self::new(403dtype,404vec![T::default(); length].into(),405Some(Bitmap::new_zeroed(length)),406)407}408409/// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.410/// # Implementation411/// This does not assume that the iterator has a known length.412pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {413Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)414}415416/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.417/// # Implementation418/// This is essentially a memcopy and is thus `O(N)`419pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {420Self::new(421T::PRIMITIVE.into(),422Vec::<T>::from(slice.as_ref()).into(),423None,424)425}426427/// Calls f with a [`PrimitiveArray`] backed by this slice.428///429/// Aborts if any clones of the [`PrimitiveArray`] still live when `f` returns.430pub fn with_slice<R, F: FnOnce(PrimitiveArray<T>) -> R>(slice: &[T], f: F) -> R {431Buffer::with_slice(slice, |buf| f(Self::new(T::PRIMITIVE.into(), buf, None)))432}433434/// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.435/// # Implementation436/// This does not assume that the iterator has a known length.437pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {438MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()439}440441/// Creates a new [`PrimitiveArray`] from an iterator over values442///443/// # Safety444/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).445/// I.e. that `size_hint().1` correctly reports its length.446pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {447MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()448}449450/// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.451pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {452MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()453}454455/// Creates a [`PrimitiveArray`] from an iterator of optional values.456///457/// # Safety458/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).459/// I.e. that `size_hint().1` correctly reports its length.460pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {461MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()462}463464/// Alias for `Self::try_new(..).unwrap()`.465/// # Panics466/// This function errors iff:467/// * The validity is not `None` and its length is different from `values`'s length468/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`].469pub fn new(dtype: ArrowDataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {470Self::try_new(dtype, values, validity).unwrap()471}472473/// Transmute this PrimitiveArray into another PrimitiveArray.474///475/// T and U must have the same size and alignment.476pub fn transmute<U: NativeType>(self) -> PrimitiveArray<U> {477let PrimitiveArray {478values, validity, ..479} = self;480PrimitiveArray::new(481U::PRIMITIVE.into(),482Buffer::try_transmute::<U>(values).unwrap(),483validity,484)485}486487/// Fills this entire array with the given value, leaving the validity mask intact.488///489/// Reuses the memory of the PrimitiveArray if possible.490pub fn fill_with(mut self, value: T) -> Self {491if let Some(values) = self.get_mut_values() {492for x in values.iter_mut() {493*x = value;494}495self496} else {497let values = vec![value; self.len()];498Self::new(T::PRIMITIVE.into(), values.into(), self.validity)499}500}501}502503impl<T: NativeType> Array for PrimitiveArray<T> {504impl_common_array!();505506fn validity(&self) -> Option<&Bitmap> {507self.validity.as_ref()508}509510#[inline]511fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {512Box::new(self.clone().with_validity(validity))513}514}515516impl<T: NativeType> Splitable for PrimitiveArray<T> {517#[inline(always)]518fn check_bound(&self, offset: usize) -> bool {519offset <= self.len()520}521522unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {523let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };524let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };525526(527Self {528dtype: self.dtype.clone(),529values: lhs_values,530validity: lhs_validity,531},532Self {533dtype: self.dtype.clone(),534values: rhs_values,535validity: rhs_validity,536},537)538}539}540541impl<T: NativeType> SliceAble for PrimitiveArray<T> {542unsafe fn slice_unchecked(&self, range: Range<usize>) -> Self {543self.clone().sliced_unchecked(range.start, range.len())544}545546fn slice(&self, range: Range<usize>) -> Self {547self.clone().sliced(range.start, range.len())548}549}550551impl<T: NativeType> Indexable for PrimitiveArray<T> {552type Item = Option<T>;553554fn get(&self, i: usize) -> Self::Item {555if !self.is_null(i) {556// soundness: Array::is_null panics if i >= self.len557unsafe { Some(self.value_unchecked(i)) }558} else {559None560}561}562563unsafe fn get_unchecked(&self, i: usize) -> Self::Item {564if !self.is_null_unchecked(i) {565Some(self.value_unchecked(i))566} else {567None568}569}570}571572/// A type definition [`PrimitiveArray`] for `i8`573pub type Int8Array = PrimitiveArray<i8>;574/// A type definition [`PrimitiveArray`] for `i16`575pub type Int16Array = PrimitiveArray<i16>;576/// A type definition [`PrimitiveArray`] for `i32`577pub type Int32Array = PrimitiveArray<i32>;578/// A type definition [`PrimitiveArray`] for `i64`579pub type Int64Array = PrimitiveArray<i64>;580/// A type definition [`PrimitiveArray`] for `i128`581pub type Int128Array = PrimitiveArray<i128>;582/// A type definition [`PrimitiveArray`] for `i256`583pub type Int256Array = PrimitiveArray<i256>;584/// A type definition [`PrimitiveArray`] for [`days_ms`]585pub type DaysMsArray = PrimitiveArray<days_ms>;586/// A type definition [`PrimitiveArray`] for [`months_days_ns`]587pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;588/// A type definition [`PrimitiveArray`] for `f16`589pub type Float16Array = PrimitiveArray<pf16>;590/// A type definition [`PrimitiveArray`] for `f32`591pub type Float32Array = PrimitiveArray<f32>;592/// A type definition [`PrimitiveArray`] for `f64`593pub type Float64Array = PrimitiveArray<f64>;594/// A type definition [`PrimitiveArray`] for `u8`595pub type UInt8Array = PrimitiveArray<u8>;596/// A type definition [`PrimitiveArray`] for `u16`597pub type UInt16Array = PrimitiveArray<u16>;598/// A type definition [`PrimitiveArray`] for `u32`599pub type UInt32Array = PrimitiveArray<u32>;600/// A type definition [`PrimitiveArray`] for `u64`601pub type UInt64Array = PrimitiveArray<u64>;602/// A type definition [`PrimitiveArray`] for `u128`603pub type UInt128Array = PrimitiveArray<u128>;604605/// A type definition [`MutablePrimitiveArray`] for `i8`606pub type Int8Vec = MutablePrimitiveArray<i8>;607/// A type definition [`MutablePrimitiveArray`] for `i16`608pub type Int16Vec = MutablePrimitiveArray<i16>;609/// A type definition [`MutablePrimitiveArray`] for `i32`610pub type Int32Vec = MutablePrimitiveArray<i32>;611/// A type definition [`MutablePrimitiveArray`] for `i64`612pub type Int64Vec = MutablePrimitiveArray<i64>;613/// A type definition [`MutablePrimitiveArray`] for `i128`614pub type Int128Vec = MutablePrimitiveArray<i128>;615/// A type definition [`MutablePrimitiveArray`] for `i256`616pub type Int256Vec = MutablePrimitiveArray<i256>;617/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]618pub type DaysMsVec = MutablePrimitiveArray<days_ms>;619/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]620pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;621/// A type definition [`MutablePrimitiveArray`] for `f16`622pub type Float16Vec = MutablePrimitiveArray<pf16>;623/// A type definition [`MutablePrimitiveArray`] for `f32`624pub type Float32Vec = MutablePrimitiveArray<f32>;625/// A type definition [`MutablePrimitiveArray`] for `f64`626pub type Float64Vec = MutablePrimitiveArray<f64>;627/// A type definition [`MutablePrimitiveArray`] for `u8`628pub type UInt8Vec = MutablePrimitiveArray<u8>;629/// A type definition [`MutablePrimitiveArray`] for `u16`630pub type UInt16Vec = MutablePrimitiveArray<u16>;631/// A type definition [`MutablePrimitiveArray`] for `u32`632pub type UInt32Vec = MutablePrimitiveArray<u32>;633/// A type definition [`MutablePrimitiveArray`] for `u64`634pub type UInt64Vec = MutablePrimitiveArray<u64>;635/// A type definition [`MutablePrimitiveArray`] for `u128`636pub type UInt128Vec = MutablePrimitiveArray<u128>;637638impl<T: NativeType> Default for PrimitiveArray<T> {639fn default() -> Self {640PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)641}642}643644impl<T: NativeType> Bounded for PrimitiveArray<T> {645fn len(&self) -> usize {646self.values.len()647}648}649650impl<T: NativeType> NullCount for PrimitiveArray<T> {651fn null_count(&self) -> usize {652<Self as Array>::null_count(self)653}654}655656657