Path: blob/main/crates/polars-arrow/src/compute/aggregate/memory.rs
6939 views
use crate::array::*;1use crate::bitmap::Bitmap;2use crate::datatypes::PhysicalType;3pub use crate::types::PrimitiveType;4use crate::{match_integer_type, with_match_primitive_type_full};5fn validity_size(validity: Option<&Bitmap>) -> usize {6validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)7}89macro_rules! dyn_binary {10($array:expr, $ty:ty, $o:ty) => {{11let array = $array.as_any().downcast_ref::<$ty>().unwrap();12let offsets = array.offsets().buffer();1314// in case of Binary/Utf8/List the offsets are sliced,15// not the values buffer16let values_start = offsets[0] as usize;17let values_end = offsets[offsets.len() - 1] as usize;1819values_end - values_start20+ offsets.len() * size_of::<$o>()21+ validity_size(array.validity())22}};23}2425fn binview_size<T: ViewType + ?Sized>(array: &BinaryViewArrayGeneric<T>) -> usize {26// We choose the optimal usage as data can be shared across buffers.27// If we would sum all buffers we overestimate memory usage and trigger OOC when not needed.28array.total_bytes_len()29}3031/// Returns the total (heap) allocated size of the array in bytes.32/// # Implementation33/// This estimation is the sum of the size of its buffers, validity, including nested arrays.34/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the35/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.36///37/// When an array is sliced, its allocated size remains constant because the buffer unchanged.38/// However, this function will yield a smaller number. This is because this function returns39/// the visible size of the buffer, not its total capacity.40///41/// FFI buffers are included in this estimation.42pub fn estimated_bytes_size(array: &dyn Array) -> usize {43use PhysicalType::*;44match array.dtype().to_physical_type() {45Null => 0,46Boolean => {47let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();48array.values().as_slice().0.len() + validity_size(array.validity())49},50Primitive(PrimitiveType::DaysMs) => {51let array = array.as_any().downcast_ref::<DaysMsArray>().unwrap();52array.values().len() * size_of::<i32>() * 2 + validity_size(array.validity())53},54Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {55let array = array56.as_any()57.downcast_ref::<PrimitiveArray<$T>>()58.unwrap();5960array.values().len() * size_of::<$T>() + validity_size(array.validity())61}),62Binary => dyn_binary!(array, BinaryArray<i32>, i32),63FixedSizeBinary => {64let array = array65.as_any()66.downcast_ref::<FixedSizeBinaryArray>()67.unwrap();68array.values().len() + validity_size(array.validity())69},70LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),71Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),72LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),73List => {74let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();75estimated_bytes_size(array.values().as_ref())76+ array.offsets().len_proxy() * size_of::<i32>()77+ validity_size(array.validity())78},79FixedSizeList => {80let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();81estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())82},83LargeList => {84let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();85estimated_bytes_size(array.values().as_ref())86+ array.offsets().len_proxy() * size_of::<i64>()87+ validity_size(array.validity())88},89Struct => {90let array = array.as_any().downcast_ref::<StructArray>().unwrap();91array92.values()93.iter()94.map(|x| x.as_ref())95.map(estimated_bytes_size)96.sum::<usize>()97+ validity_size(array.validity())98},99Union => {100let array = array.as_any().downcast_ref::<UnionArray>().unwrap();101let types = array.types().len() * size_of::<i8>();102let offsets = array103.offsets()104.as_ref()105.map(|x| x.len() * size_of::<i32>())106.unwrap_or_default();107let fields = array108.fields()109.iter()110.map(|x| x.as_ref())111.map(estimated_bytes_size)112.sum::<usize>();113types + offsets + fields114},115Dictionary(key_type) => match_integer_type!(key_type, |$T| {116let array = array117.as_any()118.downcast_ref::<DictionaryArray<$T>>()119.unwrap();120estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())121}),122Utf8View => binview_size::<str>(array.as_any().downcast_ref().unwrap()),123BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()),124Map => {125let array = array.as_any().downcast_ref::<MapArray>().unwrap();126let offsets = array.offsets().len_proxy() * size_of::<i32>();127offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())128},129}130}131132133