Path: blob/main/crates/polars-arrow/src/legacy/array/mod.rs
6939 views
#![allow(unsafe_op_in_unsafe_fn)]1use crate::array::{2Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,3StructArray, ViewType, new_null_array,4};5use crate::bitmap::BitmapBuilder;6use crate::datatypes::ArrowDataType;7use crate::legacy::utils::CustomIterTools;8use crate::offset::Offsets;9use crate::types::NativeType;1011pub mod default_arrays;12#[cfg(feature = "dtype-array")]13pub mod fixed_size_list;14pub mod list;15pub mod null;16pub mod slice;17pub mod utf8;1819pub use slice::*;2021use crate::legacy::prelude::LargeListArray;2223macro_rules! iter_to_values {24($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{25$iterator26.filter_map(|opt_iter| match opt_iter {27Some(x) => {28let it = x.into_iter();29$length_so_far += it.size_hint().0 as i64;30$validity.push(true);31$offsets.push($length_so_far);32Some(it)33},34None => {35$validity.push(false);36$offsets.push($length_so_far);37None38},39})40.flatten()41.collect()42}};43}4445pub trait ListFromIter {46/// Create a list-array from an iterator.47/// Used in group_by agg-list48///49/// # Safety50/// Will produce incorrect arrays if size hint is incorrect.51unsafe fn from_iter_primitive_trusted_len<T, P, I>(52iter: I,53dtype: ArrowDataType,54) -> ListArray<i64>55where56T: NativeType,57P: IntoIterator<Item = Option<T>>,58I: IntoIterator<Item = Option<P>>,59{60let iterator = iter.into_iter();61let (lower, _) = iterator.size_hint();6263let mut validity = BitmapBuilder::with_capacity(lower);64let mut offsets = Vec::<i64>::with_capacity(lower + 1);65let mut length_so_far = 0i64;66offsets.push(length_so_far);6768let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);6970// SAFETY:71// offsets are monotonically increasing72ListArray::new(73ListArray::<i64>::default_datatype(dtype.clone()),74Offsets::new_unchecked(offsets).into(),75Box::new(values.to(dtype)),76validity.into_opt_validity(),77)78}7980/// Create a list-array from an iterator.81/// Used in group_by agg-list82///83/// # Safety84/// Will produce incorrect arrays if size hint is incorrect.85unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>86where87I: IntoIterator<Item = Option<P>>,88P: IntoIterator<Item = Option<bool>>,89{90let iterator = iter.into_iter();91let (lower, _) = iterator.size_hint();9293let mut validity = Vec::with_capacity(lower);94let mut offsets = Vec::<i64>::with_capacity(lower + 1);95let mut length_so_far = 0i64;96offsets.push(length_so_far);9798let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);99100// SAFETY:101// Offsets are monotonically increasing.102ListArray::new(103ListArray::<i64>::default_datatype(ArrowDataType::Boolean),104Offsets::new_unchecked(offsets).into(),105Box::new(values),106Some(validity.into()),107)108}109110/// # Safety111/// Will produce incorrect arrays if size hint is incorrect.112unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(113iter: I,114n_elements: usize,115) -> ListArray<i64>116where117I: IntoIterator<Item = Option<P>>,118P: IntoIterator<Item = Option<Ref>>,119Ref: AsRef<T>,120{121let iterator = iter.into_iter();122let (lower, _) = iterator.size_hint();123124let mut validity = BitmapBuilder::with_capacity(lower);125let mut offsets = Vec::<i64>::with_capacity(lower + 1);126let mut length_so_far = 0i64;127offsets.push(length_so_far);128129let values: MutableBinaryViewArray<T> = iterator130.filter_map(|opt_iter| match opt_iter {131Some(x) => {132let it = x.into_iter();133length_so_far += it.size_hint().0 as i64;134validity.push(true);135offsets.push(length_so_far);136Some(it)137},138None => {139validity.push(false);140offsets.push(length_so_far);141None142},143})144.flatten()145.trust_my_length(n_elements)146.collect();147148// SAFETY:149// offsets are monotonically increasing150ListArray::new(151ListArray::<i64>::default_datatype(T::DATA_TYPE),152Offsets::new_unchecked(offsets).into(),153values.freeze().boxed(),154validity.into_opt_validity(),155)156}157158/// Create a list-array from an iterator.159/// Used in group_by agg-list160///161/// # Safety162/// Will produce incorrect arrays if size hint is incorrect.163unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>164where165I: IntoIterator<Item = Option<P>>,166P: IntoIterator<Item = Option<Ref>>,167Ref: AsRef<str>,168{169Self::from_iter_binview_trusted_len(iter, n_elements)170}171172/// Create a list-array from an iterator.173/// Used in group_by agg-list174///175/// # Safety176/// Will produce incorrect arrays if size hint is incorrect.177unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>178where179I: IntoIterator<Item = Option<P>>,180P: IntoIterator<Item = Option<Ref>>,181Ref: AsRef<[u8]>,182{183Self::from_iter_binview_trusted_len(iter, n_elements)184}185}186impl ListFromIter for ListArray<i64> {}187188fn is_nested_null(dtype: &ArrowDataType) -> bool {189match dtype {190ArrowDataType::Null => true,191ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),192ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),193ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),194_ => false,195}196}197198/// Cast null arrays to inner type and ensure that all offsets remain correct199pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {200match dtype {201ArrowDataType::LargeList(field) => {202let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();203let inner = array.values();204let new_values = convert_inner_type(inner.as_ref(), field.dtype());205let dtype = LargeListArray::default_datatype(new_values.dtype().clone());206LargeListArray::new(207dtype,208array.offsets().clone(),209new_values,210array.validity().cloned(),211)212.boxed()213},214ArrowDataType::FixedSizeList(field, width) => {215let width = *width;216217let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();218let inner = array.values();219let length = if width == array.size() {220array.len()221} else {222assert!(!array.values().is_empty() || width != 0);223if width == 0 {2240225} else {226array.values().len() / width227}228};229let new_values = convert_inner_type(inner.as_ref(), field.dtype());230let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);231FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()232},233ArrowDataType::Struct(fields) => {234let array = array.as_any().downcast_ref::<StructArray>().unwrap();235let inner = array.values();236let new_values = inner237.iter()238.zip(fields)239.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))240.collect::<Vec<_>>();241StructArray::new(242dtype.clone(),243array.len(),244new_values,245array.validity().cloned(),246)247.boxed()248},249_ => new_null_array(dtype.clone(), array.len()),250}251}252253254