Path: blob/main/crates/polars-arrow/src/array/list/mod.rs
8398 views
use super::specification::try_check_offsets_bounds;1use super::{Array, Splitable, new_empty_array};2use crate::bitmap::Bitmap;3use crate::datatypes::{ArrowDataType, Field};4use crate::offset::{Offset, Offsets, OffsetsBuffer};56mod builder;7pub use builder::*;8mod ffi;9pub(super) mod fmt;10mod iterator;11pub use iterator::*;12mod mutable;13pub use mutable::*;14use polars_error::{PolarsResult, polars_bail};15use polars_utils::pl_str::PlSmallStr;16#[cfg(feature = "proptest")]17pub mod proptest;1819/// Name used for the values array within List/FixedSizeList arrays.20pub const LIST_VALUES_NAME: PlSmallStr = PlSmallStr::from_static("item");2122/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.23#[derive(Clone)]24pub struct ListArray<O: Offset> {25dtype: ArrowDataType,26offsets: OffsetsBuffer<O>,27values: Box<dyn Array>,28validity: Option<Bitmap>,29}3031impl<O: Offset> ListArray<O> {32/// Creates a new [`ListArray`].33///34/// # Errors35/// This function returns an error iff:36/// * `offsets.last()` is greater than `values.len()`.37/// * the validity's length is not equal to `offsets.len_proxy()`.38/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].39/// * The `dtype`'s inner field's data type is not equal to `values.dtype`.40/// # Implementation41/// This function is `O(1)`42pub fn try_new(43dtype: ArrowDataType,44offsets: OffsetsBuffer<O>,45values: Box<dyn Array>,46validity: Option<Bitmap>,47) -> PolarsResult<Self> {48try_check_offsets_bounds(&offsets, values.len())?;4950if validity51.as_ref()52.is_some_and(|validity| validity.len() != offsets.len_proxy())53{54polars_bail!(ComputeError: "validity mask length must match the number of values")55}5657let child_dtype = Self::try_get_child(&dtype)?.dtype();58let values_dtype = values.dtype();59if child_dtype != values_dtype {60polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");61}6263Ok(Self {64dtype,65offsets,66values,67validity,68})69}7071/// Creates a new [`ListArray`].72///73/// # Panics74/// This function panics iff:75/// * `offsets.last()` is greater than `values.len()`.76/// * the validity's length is not equal to `offsets.len_proxy()`.77/// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].78/// * The `dtype`'s inner field's data type is not equal to `values.dtype`.79/// # Implementation80/// This function is `O(1)`81pub fn new(82dtype: ArrowDataType,83offsets: OffsetsBuffer<O>,84values: Box<dyn Array>,85validity: Option<Bitmap>,86) -> Self {87Self::try_new(dtype, offsets, values, validity).unwrap()88}8990/// Returns a new empty [`ListArray`].91pub fn new_empty(dtype: ArrowDataType) -> Self {92let values = new_empty_array(Self::get_child_type(&dtype).clone());93Self::new(dtype, OffsetsBuffer::default(), values, None)94}9596/// Returns a new null [`ListArray`].97#[inline]98pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {99let child = Self::get_child_type(&dtype).clone();100Self::new(101dtype,102Offsets::new_zeroed(length).into(),103new_empty_array(child),104Some(Bitmap::new_zeroed(length)),105)106}107108pub fn into_inner(109self,110) -> (111ArrowDataType,112Box<dyn Array>,113OffsetsBuffer<O>,114Option<Bitmap>,115) {116(self.dtype, self.values, self.offsets, self.validity)117}118}119120impl<O: Offset> ListArray<O> {121/// Slices this [`ListArray`].122/// # Panics123/// panics iff `offset + length > self.len()`124pub fn slice(&mut self, offset: usize, length: usize) {125assert!(126offset + length <= self.len(),127"the offset of the new Buffer cannot exceed the existing length"128);129unsafe { self.slice_unchecked(offset, length) }130}131132/// Slices this [`ListArray`].133///134/// # Safety135/// The caller must ensure that `offset + length < self.len()`.136pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {137self.validity = self138.validity139.take()140.map(|bitmap| bitmap.sliced_unchecked(offset, length))141.filter(|bitmap| bitmap.unset_bits() > 0);142self.offsets.slice_unchecked(offset, length + 1);143}144145impl_sliced!();146impl_mut_validity!();147impl_into_array!();148}149150// Accessors151impl<O: Offset> ListArray<O> {152/// Returns the length of this array153#[inline]154pub fn len(&self) -> usize {155self.offsets.len_proxy()156}157158/// Returns the element at index `i`159/// # Panic160/// Panics iff `i >= self.len()`161#[inline]162pub fn value(&self, i: usize) -> Box<dyn Array> {163assert!(i < self.len());164// SAFETY: invariant of this function165unsafe { self.value_unchecked(i) }166}167168/// Returns the element at index `i` as &str169///170/// # Safety171/// Assumes that the `i < self.len`.172#[inline]173pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {174// SAFETY: the invariant of the function175let (start, end) = self.offsets.start_end_unchecked(i);176let length = end - start;177178// SAFETY: the invariant of the struct179self.values.sliced_unchecked(start, length)180}181182/// The optional validity.183#[inline]184pub fn validity(&self) -> Option<&Bitmap> {185self.validity.as_ref()186}187188/// The offsets [`Buffer`].189#[inline]190pub fn offsets(&self) -> &OffsetsBuffer<O> {191&self.offsets192}193194/// The values.195#[inline]196pub fn values(&self) -> &Box<dyn Array> {197&self.values198}199}200201impl<O: Offset> ListArray<O> {202/// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable203pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {204let field = Box::new(Field::new(LIST_VALUES_NAME, dtype, true));205if O::IS_LARGE {206ArrowDataType::LargeList(field)207} else {208ArrowDataType::List(field)209}210}211212/// Returns a the inner [`Field`]213/// # Panics214/// Panics iff the logical type is not consistent with this struct.215pub fn get_child_field(dtype: &ArrowDataType) -> &Field {216Self::try_get_child(dtype).unwrap()217}218219/// Returns a the inner [`Field`]220/// # Errors221/// Panics iff the logical type is not consistent with this struct.222pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {223if O::IS_LARGE {224match dtype.to_storage() {225ArrowDataType::LargeList(child) => Ok(child.as_ref()),226_ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),227}228} else {229match dtype.to_storage() {230ArrowDataType::List(child) => Ok(child.as_ref()),231_ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),232}233}234}235236/// Returns a the inner [`ArrowDataType`]237/// # Panics238/// Panics iff the logical type is not consistent with this struct.239pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {240Self::get_child_field(dtype).dtype()241}242}243244impl<O: Offset> Array for ListArray<O> {245impl_common_array!();246247fn validity(&self) -> Option<&Bitmap> {248self.validity.as_ref()249}250251#[inline]252fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {253Box::new(self.clone().with_validity(validity))254}255}256257impl<O: Offset> Splitable for ListArray<O> {258fn check_bound(&self, offset: usize) -> bool {259offset <= self.len()260}261262unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {263let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };264let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };265266(267Self {268dtype: self.dtype.clone(),269offsets: lhs_offsets,270validity: lhs_validity,271values: self.values.clone(),272},273Self {274dtype: self.dtype.clone(),275offsets: rhs_offsets,276validity: rhs_validity,277values: self.values.clone(),278},279)280}281}282283284