Path: blob/main/crates/polars-arrow/src/array/primitive/mod.rs
6939 views
use std::ops::Range;12use either::Either;34use super::{Array, Splitable};5use crate::array::iterator::NonNullValuesIter;6use crate::bitmap::Bitmap;7use crate::bitmap::utils::{BitmapIter, ZipValidity};8use crate::buffer::Buffer;9use crate::datatypes::*;10use crate::trusted_len::TrustedLen;11use crate::types::{NativeType, days_ms, f16, i256, months_days_ns};1213mod ffi;14pub(super) mod fmt;15mod from_natural;16pub mod iterator;17#[cfg(feature = "proptest")]18pub mod proptest;1920mod mutable;21pub use mutable::*;22mod builder;23pub use builder::*;24use polars_error::{PolarsResult, polars_bail};25use polars_utils::index::{Bounded, Indexable, NullCount};26use polars_utils::slice::SliceAble;2728/// A [`PrimitiveArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<T>>` where29/// T is [`NativeType`] (e.g. [`i32`]). It implements [`Array`].30///31/// One way to think about a [`PrimitiveArray`] is `(DataType, Arc<Vec<T>>, Option<Arc<Vec<u8>>>)`32/// where:33/// * the first item is the array's logical type34/// * the second is the immutable values35/// * the third is the immutable validity (whether a value is null or not as a bitmap).36///37/// The size of this struct is `O(1)`, as all data is stored behind an [`std::sync::Arc`].38/// # Example39/// ```40/// use polars_arrow::array::PrimitiveArray;41/// use polars_arrow::bitmap::Bitmap;42/// use polars_arrow::buffer::Buffer;43///44/// let array = PrimitiveArray::from([Some(1i32), None, Some(10)]);45/// assert_eq!(array.value(0), 1);46/// assert_eq!(array.iter().collect::<Vec<_>>(), vec![Some(&1i32), None, Some(&10)]);47/// assert_eq!(array.values_iter().copied().collect::<Vec<_>>(), vec![1, 0, 10]);48/// // the underlying representation49/// assert_eq!(array.values(), &Buffer::from(vec![1i32, 0, 10]));50/// assert_eq!(array.validity(), Some(&Bitmap::from([true, false, true])));51///52/// ```53#[derive(Clone)]54pub struct PrimitiveArray<T: NativeType> {55dtype: ArrowDataType,56values: Buffer<T>,57validity: Option<Bitmap>,58}5960pub(super) fn check<T: NativeType>(61dtype: &ArrowDataType,62values: &[T],63validity_len: Option<usize>,64) -> PolarsResult<()> {65if validity_len.is_some_and(|len| len != values.len()) {66polars_bail!(ComputeError: "validity mask length must match the number of values")67}6869if dtype.to_physical_type() != PhysicalType::Primitive(T::PRIMITIVE) {70polars_bail!(ComputeError: "PrimitiveArray can only be initialized with a DataType whose physical type is Primitive")71}72Ok(())73}7475impl<T: NativeType> PrimitiveArray<T> {76/// The canonical method to create a [`PrimitiveArray`] out of its internal components.77/// # Implementation78/// This function is `O(1)`.79///80/// # Errors81/// This function errors iff:82/// * The validity is not `None` and its length is different from `values`'s length83/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]84pub fn try_new(85dtype: ArrowDataType,86values: Buffer<T>,87validity: Option<Bitmap>,88) -> PolarsResult<Self> {89check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;90Ok(Self {91dtype,92values,93validity,94})95}9697/// # Safety98/// Doesn't check invariants99pub unsafe fn new_unchecked(100dtype: ArrowDataType,101values: Buffer<T>,102validity: Option<Bitmap>,103) -> Self {104if cfg!(debug_assertions) {105check(&dtype, &values, validity.as_ref().map(|v| v.len())).unwrap();106}107108Self {109dtype,110values,111validity,112}113}114115/// Returns a new [`PrimitiveArray`] with a different logical type.116///117/// This function is useful to assign a different [`ArrowDataType`] to the array.118/// Used to change the arrays' logical type (see example).119/// # Example120/// ```121/// use polars_arrow::array::Int32Array;122/// use polars_arrow::datatypes::ArrowDataType;123///124/// let array = Int32Array::from(&[Some(1), None, Some(2)]).to(ArrowDataType::Date32);125/// assert_eq!(126/// format!("{:?}", array),127/// "Date32[1970-01-02, None, 1970-01-03]"128/// );129/// ```130/// # Panics131/// Panics iff the `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive(T::PRIMITIVE)`]132#[inline]133#[must_use]134pub fn to(self, dtype: ArrowDataType) -> Self {135check(136&dtype,137&self.values,138self.validity.as_ref().map(|v| v.len()),139)140.unwrap();141Self {142dtype,143values: self.values,144validity: self.validity,145}146}147148/// Creates a (non-null) [`PrimitiveArray`] from a vector of values.149/// This function is `O(1)`.150/// # Examples151/// ```152/// use polars_arrow::array::PrimitiveArray;153///154/// let array = PrimitiveArray::from_vec(vec![1, 2, 3]);155/// assert_eq!(format!("{:?}", array), "Int32[1, 2, 3]");156/// ```157pub fn from_vec(values: Vec<T>) -> Self {158Self::new(T::PRIMITIVE.into(), values.into(), None)159}160161/// Returns an iterator over the values and validity, `Option<&T>`.162#[inline]163pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<'_, T>, BitmapIter<'_>> {164ZipValidity::new_with_validity(self.values().iter(), self.validity())165}166167/// Returns an iterator of the values, `&T`, ignoring the arrays' validity.168#[inline]169pub fn values_iter(&self) -> std::slice::Iter<'_, T> {170self.values().iter()171}172173/// Returns an iterator of the non-null values `T`.174#[inline]175pub fn non_null_values_iter(&self) -> NonNullValuesIter<'_, [T]> {176NonNullValuesIter::new(self.values(), self.validity())177}178179/// Returns the length of this array180#[inline]181pub fn len(&self) -> usize {182self.values.len()183}184185/// The values [`Buffer`].186/// Values on null slots are undetermined (they can be anything).187#[inline]188pub fn values(&self) -> &Buffer<T> {189&self.values190}191192/// Returns the optional validity.193#[inline]194pub fn validity(&self) -> Option<&Bitmap> {195self.validity.as_ref()196}197198/// Returns the arrays' [`ArrowDataType`].199#[inline]200pub fn dtype(&self) -> &ArrowDataType {201&self.dtype202}203204/// Returns the value at slot `i`.205///206/// Equivalent to `self.values()[i]`. The value of a null slot is undetermined (it can be anything).207/// # Panic208/// This function panics iff `i >= self.len`.209#[inline]210pub fn value(&self, i: usize) -> T {211self.values[i]212}213214/// Returns the value at index `i`.215/// The value on null slots is undetermined (it can be anything).216///217/// # Safety218/// Caller must be sure that `i < self.len()`219#[inline]220pub unsafe fn value_unchecked(&self, i: usize) -> T {221*self.values.get_unchecked(i)222}223224// /// Returns the element at index `i` or `None` if it is null225// /// # Panics226// /// iff `i >= self.len()`227// #[inline]228// pub fn get(&self, i: usize) -> Option<T> {229// if !self.is_null(i) {230// // soundness: Array::is_null panics if i >= self.len231// unsafe { Some(self.value_unchecked(i)) }232// } else {233// None234// }235// }236237/// Slices this [`PrimitiveArray`] by an offset and length.238/// # Implementation239/// This operation is `O(1)`.240#[inline]241pub fn slice(&mut self, offset: usize, length: usize) {242assert!(243offset + length <= self.len(),244"offset + length may not exceed length of array"245);246unsafe { self.slice_unchecked(offset, length) }247}248249/// Slices this [`PrimitiveArray`] by an offset and length.250/// # Implementation251/// This operation is `O(1)`.252///253/// # Safety254/// The caller must ensure that `offset + length <= self.len()`.255#[inline]256pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {257self.validity = self258.validity259.take()260.map(|bitmap| bitmap.sliced_unchecked(offset, length))261.filter(|bitmap| bitmap.unset_bits() > 0);262self.values.slice_unchecked(offset, length);263}264265impl_sliced!();266impl_mut_validity!();267impl_into_array!();268269/// Returns this [`PrimitiveArray`] with new values.270/// # Panics271/// This function panics iff `values.len() != self.len()`.272#[must_use]273pub fn with_values(mut self, values: Buffer<T>) -> Self {274self.set_values(values);275self276}277278/// Update the values of this [`PrimitiveArray`].279/// # Panics280/// This function panics iff `values.len() != self.len()`.281pub fn set_values(&mut self, values: Buffer<T>) {282assert_eq!(283values.len(),284self.len(),285"values' length must be equal to this arrays' length"286);287self.values = values;288}289290/// Applies a function `f` to the validity of this array.291///292/// This is an API to leverage clone-on-write293/// # Panics294/// This function panics if the function `f` modifies the length of the [`Bitmap`].295pub fn apply_validity<F: FnOnce(Bitmap) -> Bitmap>(&mut self, f: F) {296if let Some(validity) = std::mem::take(&mut self.validity) {297self.set_validity(Some(f(validity)))298}299}300301/// Returns an option of a mutable reference to the values of this [`PrimitiveArray`].302pub fn get_mut_values(&mut self) -> Option<&mut [T]> {303self.values.get_mut_slice()304}305306/// Returns its internal representation307#[must_use]308pub fn into_inner(self) -> (ArrowDataType, Buffer<T>, Option<Bitmap>) {309let Self {310dtype,311values,312validity,313} = self;314(dtype, values, validity)315}316317/// Creates a [`PrimitiveArray`] from its internal representation.318/// This is the inverted from [`PrimitiveArray::into_inner`]319pub fn from_inner(320dtype: ArrowDataType,321values: Buffer<T>,322validity: Option<Bitmap>,323) -> PolarsResult<Self> {324check(&dtype, &values, validity.as_ref().map(|v| v.len()))?;325Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })326}327328/// Creates a [`PrimitiveArray`] from its internal representation.329/// This is the inverted from [`PrimitiveArray::into_inner`]330///331/// # Safety332/// Callers must ensure all invariants of this struct are upheld.333pub unsafe fn from_inner_unchecked(334dtype: ArrowDataType,335values: Buffer<T>,336validity: Option<Bitmap>,337) -> Self {338Self {339dtype,340values,341validity,342}343}344345/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.346///347/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.348/// This function returns a [`MutablePrimitiveArray`] (via [`std::sync::Arc::get_mut`]) iff both values349/// and validity have not been cloned / are unique references to their underlying vectors.350///351/// This function is primarily used to reuse memory regions.352#[must_use]353pub fn into_mut(self) -> Either<Self, MutablePrimitiveArray<T>> {354use Either::*;355356if let Some(bitmap) = self.validity {357match bitmap.into_mut() {358Left(bitmap) => Left(PrimitiveArray::new(self.dtype, self.values, Some(bitmap))),359Right(mutable_bitmap) => match self.values.into_mut() {360Right(values) => Right(361MutablePrimitiveArray::try_new(self.dtype, values, Some(mutable_bitmap))362.unwrap(),363),364Left(values) => Left(PrimitiveArray::new(365self.dtype,366values,367Some(mutable_bitmap.into()),368)),369},370}371} else {372match self.values.into_mut() {373Right(values) => {374Right(MutablePrimitiveArray::try_new(self.dtype, values, None).unwrap())375},376Left(values) => Left(PrimitiveArray::new(self.dtype, values, None)),377}378}379}380381/// Returns a new empty (zero-length) [`PrimitiveArray`].382pub fn new_empty(dtype: ArrowDataType) -> Self {383Self::new(dtype, Buffer::new(), None)384}385386/// Returns a new [`PrimitiveArray`] where all slots are null / `None`.387#[inline]388pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {389Self::new(390dtype,391vec![T::default(); length].into(),392Some(Bitmap::new_zeroed(length)),393)394}395396/// Creates a (non-null) [`PrimitiveArray`] from an iterator of values.397/// # Implementation398/// This does not assume that the iterator has a known length.399pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {400Self::new(T::PRIMITIVE.into(), Vec::<T>::from_iter(iter).into(), None)401}402403/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.404/// # Implementation405/// This is essentially a memcopy and is thus `O(N)`406pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {407Self::new(408T::PRIMITIVE.into(),409Vec::<T>::from(slice.as_ref()).into(),410None,411)412}413414/// Creates a (non-null) [`PrimitiveArray`] from a [`TrustedLen`] of values.415/// # Implementation416/// This does not assume that the iterator has a known length.417pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {418MutablePrimitiveArray::<T>::from_trusted_len_values_iter(iter).into()419}420421/// Creates a new [`PrimitiveArray`] from an iterator over values422///423/// # Safety424/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).425/// I.e. that `size_hint().1` correctly reports its length.426pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {427MutablePrimitiveArray::<T>::from_trusted_len_values_iter_unchecked(iter).into()428}429430/// Creates a [`PrimitiveArray`] from a [`TrustedLen`] of optional values.431pub fn from_trusted_len_iter<I: TrustedLen<Item = Option<T>>>(iter: I) -> Self {432MutablePrimitiveArray::<T>::from_trusted_len_iter(iter).into()433}434435/// Creates a [`PrimitiveArray`] from an iterator of optional values.436///437/// # Safety438/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).439/// I.e. that `size_hint().1` correctly reports its length.440pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = Option<T>>>(iter: I) -> Self {441MutablePrimitiveArray::<T>::from_trusted_len_iter_unchecked(iter).into()442}443444/// Alias for `Self::try_new(..).unwrap()`.445/// # Panics446/// This function errors iff:447/// * The validity is not `None` and its length is different from `values`'s length448/// * The `dtype`'s [`PhysicalType`] is not equal to [`PhysicalType::Primitive`].449pub fn new(dtype: ArrowDataType, values: Buffer<T>, validity: Option<Bitmap>) -> Self {450Self::try_new(dtype, values, validity).unwrap()451}452453/// Transmute this PrimitiveArray into another PrimitiveArray.454///455/// T and U must have the same size and alignment.456pub fn transmute<U: NativeType>(self) -> PrimitiveArray<U> {457let PrimitiveArray {458values, validity, ..459} = self;460PrimitiveArray::new(461U::PRIMITIVE.into(),462Buffer::try_transmute::<U>(values).unwrap(),463validity,464)465}466467/// Fills this entire array with the given value, leaving the validity mask intact.468///469/// Reuses the memory of the PrimitiveArray if possible.470pub fn fill_with(mut self, value: T) -> Self {471if let Some(values) = self.get_mut_values() {472for x in values.iter_mut() {473*x = value;474}475self476} else {477let values = vec![value; self.len()];478Self::new(T::PRIMITIVE.into(), values.into(), self.validity)479}480}481}482483impl<T: NativeType> Array for PrimitiveArray<T> {484impl_common_array!();485486fn validity(&self) -> Option<&Bitmap> {487self.validity.as_ref()488}489490#[inline]491fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {492Box::new(self.clone().with_validity(validity))493}494}495496impl<T: NativeType> Splitable for PrimitiveArray<T> {497#[inline(always)]498fn check_bound(&self, offset: usize) -> bool {499offset <= self.len()500}501502unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {503let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };504let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };505506(507Self {508dtype: self.dtype.clone(),509values: lhs_values,510validity: lhs_validity,511},512Self {513dtype: self.dtype.clone(),514values: rhs_values,515validity: rhs_validity,516},517)518}519}520521impl<T: NativeType> SliceAble for PrimitiveArray<T> {522unsafe fn slice_unchecked(&self, range: Range<usize>) -> Self {523self.clone().sliced_unchecked(range.start, range.len())524}525526fn slice(&self, range: Range<usize>) -> Self {527self.clone().sliced(range.start, range.len())528}529}530531impl<T: NativeType> Indexable for PrimitiveArray<T> {532type Item = Option<T>;533534fn get(&self, i: usize) -> Self::Item {535if !self.is_null(i) {536// soundness: Array::is_null panics if i >= self.len537unsafe { Some(self.value_unchecked(i)) }538} else {539None540}541}542543unsafe fn get_unchecked(&self, i: usize) -> Self::Item {544if !self.is_null_unchecked(i) {545Some(self.value_unchecked(i))546} else {547None548}549}550}551552/// A type definition [`PrimitiveArray`] for `i8`553pub type Int8Array = PrimitiveArray<i8>;554/// A type definition [`PrimitiveArray`] for `i16`555pub type Int16Array = PrimitiveArray<i16>;556/// A type definition [`PrimitiveArray`] for `i32`557pub type Int32Array = PrimitiveArray<i32>;558/// A type definition [`PrimitiveArray`] for `i64`559pub type Int64Array = PrimitiveArray<i64>;560/// A type definition [`PrimitiveArray`] for `i128`561pub type Int128Array = PrimitiveArray<i128>;562/// A type definition [`PrimitiveArray`] for `i256`563pub type Int256Array = PrimitiveArray<i256>;564/// A type definition [`PrimitiveArray`] for [`days_ms`]565pub type DaysMsArray = PrimitiveArray<days_ms>;566/// A type definition [`PrimitiveArray`] for [`months_days_ns`]567pub type MonthsDaysNsArray = PrimitiveArray<months_days_ns>;568/// A type definition [`PrimitiveArray`] for `f16`569pub type Float16Array = PrimitiveArray<f16>;570/// A type definition [`PrimitiveArray`] for `f32`571pub type Float32Array = PrimitiveArray<f32>;572/// A type definition [`PrimitiveArray`] for `f64`573pub type Float64Array = PrimitiveArray<f64>;574/// A type definition [`PrimitiveArray`] for `u8`575pub type UInt8Array = PrimitiveArray<u8>;576/// A type definition [`PrimitiveArray`] for `u16`577pub type UInt16Array = PrimitiveArray<u16>;578/// A type definition [`PrimitiveArray`] for `u32`579pub type UInt32Array = PrimitiveArray<u32>;580/// A type definition [`PrimitiveArray`] for `u64`581pub type UInt64Array = PrimitiveArray<u64>;582583/// A type definition [`MutablePrimitiveArray`] for `i8`584pub type Int8Vec = MutablePrimitiveArray<i8>;585/// A type definition [`MutablePrimitiveArray`] for `i16`586pub type Int16Vec = MutablePrimitiveArray<i16>;587/// A type definition [`MutablePrimitiveArray`] for `i32`588pub type Int32Vec = MutablePrimitiveArray<i32>;589/// A type definition [`MutablePrimitiveArray`] for `i64`590pub type Int64Vec = MutablePrimitiveArray<i64>;591/// A type definition [`MutablePrimitiveArray`] for `i128`592pub type Int128Vec = MutablePrimitiveArray<i128>;593/// A type definition [`MutablePrimitiveArray`] for `i256`594pub type Int256Vec = MutablePrimitiveArray<i256>;595/// A type definition [`MutablePrimitiveArray`] for [`days_ms`]596pub type DaysMsVec = MutablePrimitiveArray<days_ms>;597/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`]598pub type MonthsDaysNsVec = MutablePrimitiveArray<months_days_ns>;599/// A type definition [`MutablePrimitiveArray`] for `f16`600pub type Float16Vec = MutablePrimitiveArray<f16>;601/// A type definition [`MutablePrimitiveArray`] for `f32`602pub type Float32Vec = MutablePrimitiveArray<f32>;603/// A type definition [`MutablePrimitiveArray`] for `f64`604pub type Float64Vec = MutablePrimitiveArray<f64>;605/// A type definition [`MutablePrimitiveArray`] for `u8`606pub type UInt8Vec = MutablePrimitiveArray<u8>;607/// A type definition [`MutablePrimitiveArray`] for `u16`608pub type UInt16Vec = MutablePrimitiveArray<u16>;609/// A type definition [`MutablePrimitiveArray`] for `u32`610pub type UInt32Vec = MutablePrimitiveArray<u32>;611/// A type definition [`MutablePrimitiveArray`] for `u64`612pub type UInt64Vec = MutablePrimitiveArray<u64>;613614impl<T: NativeType> Default for PrimitiveArray<T> {615fn default() -> Self {616PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)617}618}619620impl<T: NativeType> Bounded for PrimitiveArray<T> {621fn len(&self) -> usize {622self.values.len()623}624}625626impl<T: NativeType> NullCount for PrimitiveArray<T> {627fn null_count(&self) -> usize {628<Self as Array>::null_count(self)629}630}631632633