Path: blob/main/crates/polars-arrow/src/array/struct_/mod.rs
6939 views
use super::{Array, Splitable, new_empty_array, new_null_array};1use crate::bitmap::Bitmap;2use crate::datatypes::{ArrowDataType, Field};34mod builder;5pub use builder::*;6mod ffi;7pub(super) mod fmt;8mod iterator;9use polars_error::{PolarsResult, polars_bail, polars_ensure};10#[cfg(feature = "proptest")]11pub mod proptest;1213/// A [`StructArray`] is a nested [`Array`] with an optional validity representing14/// multiple [`Array`] with the same number of rows.15/// # Example16/// ```17/// use polars_arrow::array::*;18/// use polars_arrow::datatypes::*;19/// let boolean = BooleanArray::from_slice(&[false, false, true, true]).boxed();20/// let int = Int32Array::from_slice(&[42, 28, 19, 31]).boxed();21///22/// let fields = vec![23/// Field::new("b".into(), ArrowDataType::Boolean, false),24/// Field::new("c".into(), ArrowDataType::Int32, false),25/// ];26///27/// let array = StructArray::new(ArrowDataType::Struct(fields), 4, vec![boolean, int], None);28/// ```29#[derive(Clone)]30pub struct StructArray {31dtype: ArrowDataType,32// invariant: each array has the same length33values: Vec<Box<dyn Array>>,34// invariant: for each v in values: length == v.len()35length: usize,36validity: Option<Bitmap>,37}3839impl StructArray {40/// Returns a new [`StructArray`].41/// # Errors42/// This function errors iff:43/// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].44/// * the children of `dtype` are empty45/// * the values's len is different from children's length46/// * any of the values's data type is different from its corresponding children' data type47/// * any element of values has a different length than the first element48/// * the validity's length is not equal to the length of the first element49pub fn try_new(50dtype: ArrowDataType,51length: usize,52values: Vec<Box<dyn Array>>,53validity: Option<Bitmap>,54) -> PolarsResult<Self> {55let fields = Self::try_get_fields(&dtype)?;5657polars_ensure!(58fields.len() == values.len(),59ComputeError:60"a StructArray must have a number of fields in its DataType equal to the number of child values"61);6263fields64.iter().map(|a| &a.dtype)65.zip(values.iter().map(|a| a.dtype()))66.enumerate()67.try_for_each(|(index, (dtype, child))| {68if dtype != child {69polars_bail!(ComputeError:70"The children DataTypes of a StructArray must equal the children data types.71However, the field {index} has data type {dtype:?} but the value has data type {child:?}"72)73} else {74Ok(())75}76})?;7778values79.iter()80.map(|f| f.len())81.enumerate()82.try_for_each(|(index, f_length)| {83if f_length != length {84polars_bail!(ComputeError: "The children must have the given number of values.85However, the values at index {index} have a length of {f_length}, which is different from given length {length}.")86} else {87Ok(())88}89})?;9091if validity92.as_ref()93.is_some_and(|validity| validity.len() != length)94{95polars_bail!(ComputeError:"The validity length of a StructArray must match its number of elements")96}9798Ok(Self {99dtype,100length,101values,102validity,103})104}105106/// Returns a new [`StructArray`]107/// # Panics108/// This function panics iff:109/// * `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Struct`].110/// * the children of `dtype` are empty111/// * the values's len is different from children's length112/// * any of the values's data type is different from its corresponding children' data type113/// * any element of values has a different length than the first element114/// * the validity's length is not equal to the length of the first element115pub fn new(116dtype: ArrowDataType,117length: usize,118values: Vec<Box<dyn Array>>,119validity: Option<Bitmap>,120) -> Self {121Self::try_new(dtype, length, values, validity).unwrap()122}123124/// Creates an empty [`StructArray`].125pub fn new_empty(dtype: ArrowDataType) -> Self {126if let ArrowDataType::Struct(fields) = &dtype.to_logical_type() {127let values = fields128.iter()129.map(|field| new_empty_array(field.dtype().clone()))130.collect();131Self::new(dtype, 0, values, None)132} else {133panic!("StructArray must be initialized with DataType::Struct");134}135}136137/// Creates a null [`StructArray`] of length `length`.138pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {139if let ArrowDataType::Struct(fields) = &dtype {140let values = fields141.iter()142.map(|field| new_null_array(field.dtype().clone(), length))143.collect();144Self::new(dtype, length, values, Some(Bitmap::new_zeroed(length)))145} else {146panic!("StructArray must be initialized with DataType::Struct");147}148}149}150151// must use152impl StructArray {153/// Deconstructs the [`StructArray`] into its individual components.154#[must_use]155pub fn into_data(self) -> (Vec<Field>, usize, Vec<Box<dyn Array>>, Option<Bitmap>) {156let Self {157dtype,158length,159values,160validity,161} = self;162let fields = if let ArrowDataType::Struct(fields) = dtype {163fields164} else {165unreachable!()166};167(fields, length, values, validity)168}169170/// Slices this [`StructArray`].171/// # Panics172/// panics iff `offset + length > self.len()`173/// # Implementation174/// This operation is `O(F)` where `F` is the number of fields.175pub fn slice(&mut self, offset: usize, length: usize) {176assert!(177offset + length <= self.len(),178"offset + length may not exceed length of array"179);180unsafe { self.slice_unchecked(offset, length) }181}182183/// Slices this [`StructArray`].184/// # Implementation185/// This operation is `O(F)` where `F` is the number of fields.186///187/// # Safety188/// The caller must ensure that `offset + length <= self.len()`.189pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {190self.validity = self191.validity192.take()193.map(|bitmap| bitmap.sliced_unchecked(offset, length))194.filter(|bitmap| bitmap.unset_bits() > 0);195self.values196.iter_mut()197.for_each(|x| x.slice_unchecked(offset, length));198self.length = length;199}200201impl_sliced!();202203impl_mut_validity!();204205impl_into_array!();206}207208// Accessors209impl StructArray {210#[inline]211pub fn len(&self) -> usize {212if cfg!(debug_assertions) {213for arr in self.values.iter() {214assert_eq!(215arr.len(),216self.length,217"StructArray invariant: each array has same length"218);219}220}221222self.length223}224225/// The optional validity.226#[inline]227pub fn validity(&self) -> Option<&Bitmap> {228self.validity.as_ref()229}230231/// Returns the values of this [`StructArray`].232pub fn values(&self) -> &[Box<dyn Array>] {233&self.values234}235236/// Returns the fields of this [`StructArray`].237pub fn fields(&self) -> &[Field] {238let fields = Self::get_fields(&self.dtype);239debug_assert_eq!(self.values().len(), fields.len());240fields241}242}243244impl StructArray {245/// Returns the fields the `DataType::Struct`.246pub(crate) fn try_get_fields(dtype: &ArrowDataType) -> PolarsResult<&[Field]> {247match dtype.to_logical_type() {248ArrowDataType::Struct(fields) => Ok(fields),249_ => {250polars_bail!(ComputeError: "Struct array must be created with a DataType whose physical type is Struct")251},252}253}254255/// Returns the fields the `DataType::Struct`.256pub fn get_fields(dtype: &ArrowDataType) -> &[Field] {257Self::try_get_fields(dtype).unwrap()258}259}260261impl Array for StructArray {262impl_common_array!();263264fn validity(&self) -> Option<&Bitmap> {265self.validity.as_ref()266}267268#[inline]269fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {270Box::new(self.clone().with_validity(validity))271}272}273274impl Splitable for StructArray {275fn check_bound(&self, offset: usize) -> bool {276offset <= self.len()277}278279unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {280let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };281282let mut lhs_values = Vec::with_capacity(self.values.len());283let mut rhs_values = Vec::with_capacity(self.values.len());284285for v in self.values.iter() {286let (lhs, rhs) = unsafe { v.split_at_boxed_unchecked(offset) };287lhs_values.push(lhs);288rhs_values.push(rhs);289}290291(292Self {293dtype: self.dtype.clone(),294length: offset,295values: lhs_values,296validity: lhs_validity,297},298Self {299dtype: self.dtype.clone(),300length: self.length - offset,301values: rhs_values,302validity: rhs_validity,303},304)305}306}307308309