Path: blob/main/crates/polars-arrow/src/legacy/array/mod.rs
8415 views
#![allow(unsafe_op_in_unsafe_fn)]12use crate::array::{3Array, BooleanArray, FixedSizeListArray, ListArray, MutableBinaryViewArray, PrimitiveArray,4StructArray, ViewType, new_null_array,5};6use crate::bitmap::BitmapBuilder;7use crate::datatypes::ArrowDataType;8use crate::legacy::utils::CustomIterTools;9use crate::offset::Offsets;10use crate::types::NativeType;1112pub mod default_arrays;13#[cfg(feature = "dtype-array")]14pub mod fixed_size_list;15pub mod list;16pub mod null;17pub mod slice;18pub mod utf8;1920pub use slice::*;2122use crate::legacy::prelude::LargeListArray;2324macro_rules! iter_to_values {25($iterator:expr, $validity:expr, $offsets:expr, $length_so_far:expr) => {{26$iterator27.filter_map(|opt_iter| match opt_iter {28Some(x) => {29let it = x.into_iter();30$length_so_far += it.size_hint().0 as i64;31$validity.push(true);32$offsets.push($length_so_far);33Some(it)34},35None => {36$validity.push(false);37$offsets.push($length_so_far);38None39},40})41.flatten()42.collect()43}};44}4546pub trait ListFromIter {47/// Create a list-array from an iterator.48/// Used in group_by agg-list49///50/// # Safety51/// Will produce incorrect arrays if size hint is incorrect.52unsafe fn from_iter_primitive_trusted_len<T, P, I>(53iter: I,54dtype: ArrowDataType,55) -> ListArray<i64>56where57T: NativeType,58P: IntoIterator<Item = Option<T>>,59I: IntoIterator<Item = Option<P>>,60{61let iterator = iter.into_iter();62let (lower, _) = iterator.size_hint();6364let mut validity = BitmapBuilder::with_capacity(lower);65let mut offsets = Vec::<i64>::with_capacity(lower + 1);66let mut length_so_far = 0i64;67offsets.push(length_so_far);6869let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);7071// SAFETY:72// offsets are monotonically increasing73ListArray::new(74ListArray::<i64>::default_datatype(dtype.clone()),75Offsets::new_unchecked(offsets).into(),76Box::new(values.to(dtype)),77validity.into_opt_validity(),78)79}8081/// Create a list-array from an iterator.82/// Used in group_by agg-list83///84/// # Safety85/// Will produce incorrect arrays if size hint is incorrect.86unsafe fn from_iter_bool_trusted_len<I, P>(iter: I) -> ListArray<i64>87where88I: IntoIterator<Item = Option<P>>,89P: IntoIterator<Item = Option<bool>>,90{91let iterator = iter.into_iter();92let (lower, _) = iterator.size_hint();9394let mut validity = Vec::with_capacity(lower);95let mut offsets = Vec::<i64>::with_capacity(lower + 1);96let mut length_so_far = 0i64;97offsets.push(length_so_far);9899let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);100101// SAFETY:102// Offsets are monotonically increasing.103ListArray::new(104ListArray::<i64>::default_datatype(ArrowDataType::Boolean),105Offsets::new_unchecked(offsets).into(),106Box::new(values),107Some(validity.into()),108)109}110111/// # Safety112/// Will produce incorrect arrays if size hint is incorrect.113unsafe fn from_iter_binview_trusted_len<I, P, Ref, T: ViewType + ?Sized>(114iter: I,115n_elements: usize,116) -> ListArray<i64>117where118I: IntoIterator<Item = Option<P>>,119P: IntoIterator<Item = Option<Ref>>,120Ref: AsRef<T>,121{122let iterator = iter.into_iter();123let (lower, _) = iterator.size_hint();124125let mut validity = BitmapBuilder::with_capacity(lower);126let mut offsets = Vec::<i64>::with_capacity(lower + 1);127let mut length_so_far = 0i64;128offsets.push(length_so_far);129130let values: MutableBinaryViewArray<T> = iterator131.filter_map(|opt_iter| match opt_iter {132Some(x) => {133let it = x.into_iter();134length_so_far += it.size_hint().0 as i64;135validity.push(true);136offsets.push(length_so_far);137Some(it)138},139None => {140validity.push(false);141offsets.push(length_so_far);142None143},144})145.flatten()146.trust_my_length(n_elements)147.collect();148149// SAFETY:150// offsets are monotonically increasing151ListArray::new(152ListArray::<i64>::default_datatype(T::DATA_TYPE),153Offsets::new_unchecked(offsets).into(),154values.freeze().boxed(),155validity.into_opt_validity(),156)157}158159/// Create a list-array from an iterator.160/// Used in group_by agg-list161///162/// # Safety163/// Will produce incorrect arrays if size hint is incorrect.164unsafe fn from_iter_utf8_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>165where166I: IntoIterator<Item = Option<P>>,167P: IntoIterator<Item = Option<Ref>>,168Ref: AsRef<str>,169{170Self::from_iter_binview_trusted_len(iter, n_elements)171}172173/// Create a list-array from an iterator.174/// Used in group_by agg-list175///176/// # Safety177/// Will produce incorrect arrays if size hint is incorrect.178unsafe fn from_iter_binary_trusted_len<I, P, Ref>(iter: I, n_elements: usize) -> ListArray<i64>179where180I: IntoIterator<Item = Option<P>>,181P: IntoIterator<Item = Option<Ref>>,182Ref: AsRef<[u8]>,183{184Self::from_iter_binview_trusted_len(iter, n_elements)185}186}187impl ListFromIter for ListArray<i64> {}188189fn is_nested_null(dtype: &ArrowDataType) -> bool {190match dtype {191ArrowDataType::Null => true,192ArrowDataType::LargeList(field) => is_nested_null(field.dtype()),193ArrowDataType::FixedSizeList(field, _) => is_nested_null(field.dtype()),194ArrowDataType::Struct(fields) => fields.iter().all(|field| is_nested_null(field.dtype())),195_ => false,196}197}198199/// Cast null arrays to inner type and ensure that all offsets remain correct200pub fn convert_inner_type(array: &dyn Array, dtype: &ArrowDataType) -> Box<dyn Array> {201match dtype {202ArrowDataType::LargeList(field) => {203let array = array.as_any().downcast_ref::<LargeListArray>().unwrap();204let inner = array.values();205let new_values = convert_inner_type(inner.as_ref(), field.dtype());206let dtype = LargeListArray::default_datatype(new_values.dtype().clone());207LargeListArray::new(208dtype,209array.offsets().clone(),210new_values,211array.validity().cloned(),212)213.boxed()214},215ArrowDataType::FixedSizeList(field, width) => {216let width = *width;217218let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();219let inner = array.values();220let length = if width == array.size() {221array.len()222} else {223assert!(!array.values().is_empty() || width != 0);224if width == 0 {2250226} else {227array.values().len() / width228}229};230let new_values = convert_inner_type(inner.as_ref(), field.dtype());231let dtype = FixedSizeListArray::default_datatype(new_values.dtype().clone(), width);232FixedSizeListArray::new(dtype, length, new_values, array.validity().cloned()).boxed()233},234ArrowDataType::Struct(fields) => {235let array = array.as_any().downcast_ref::<StructArray>().unwrap();236let inner = array.values();237let new_values = inner238.iter()239.zip(fields)240.map(|(arr, field)| convert_inner_type(arr.as_ref(), field.dtype()))241.collect::<Vec<_>>();242StructArray::new(243dtype.clone(),244array.len(),245new_values,246array.validity().cloned(),247)248.boxed()249},250_ => new_null_array(dtype.clone(), array.len()),251}252}253254255