Path: blob/main/crates/polars-arrow/src/compute/aggregate/memory.rs
8422 views
use crate::array::*;1use crate::bitmap::Bitmap;2use crate::datatypes::PhysicalType;3use crate::types::Index;4pub use crate::types::PrimitiveType;5use crate::{match_integer_type, with_match_primitive_type_full};6fn validity_size(validity: Option<&Bitmap>) -> usize {7validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)8}910macro_rules! dyn_binary {11($array:expr, $ty:ty, $o:ty) => {{12let array = $array.as_any().downcast_ref::<$ty>().unwrap();13let offsets = array.offsets().buffer();1415// in case of Binary/Utf8/List the offsets are sliced,16// not the values buffer17let values_start = offsets[0] as usize;18let values_end = offsets[offsets.len() - 1] as usize;1920values_end - values_start21+ offsets.len() * size_of::<$o>()22+ validity_size(array.validity())23}};24}2526fn binview_size<T: ViewType + ?Sized>(array: &BinaryViewArrayGeneric<T>) -> usize {27// We choose the optimal usage as data can be shared across buffers.28// If we would sum all buffers we overestimate memory usage and trigger OOC when not needed.29array.total_bytes_len()30}3132/// Returns the total (heap) allocated size of the array in bytes.33/// # Implementation34/// This estimation is the sum of the size of its buffers, validity, including nested arrays.35/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the36/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.37///38/// When an array is sliced, its allocated size remains constant because the buffer unchanged.39/// However, this function will yield a smaller number. This is because this function returns40/// the visible size of the buffer, not its total capacity.41///42/// FFI buffers are included in this estimation.43pub fn estimated_bytes_size(array: &dyn Array) -> usize {44use PhysicalType::*;45match array.dtype().to_physical_type() {46Null => 0,47Boolean => {48let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();49array.values().as_slice().0.len() + validity_size(array.validity())50},51Primitive(PrimitiveType::DaysMs) => {52let array = array.as_any().downcast_ref::<DaysMsArray>().unwrap();53array.values().len() * size_of::<i32>() * 2 + validity_size(array.validity())54},55Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {56let array = array57.as_any()58.downcast_ref::<PrimitiveArray<$T>>()59.unwrap();6061array.values().len() * size_of::<$T>() + validity_size(array.validity())62}),63Binary => dyn_binary!(array, BinaryArray<i32>, i32),64FixedSizeBinary => {65let array = array66.as_any()67.downcast_ref::<FixedSizeBinaryArray>()68.unwrap();69array.values().len() + validity_size(array.validity())70},71LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),72Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),73LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),74List => {75let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();76estimated_bytes_size(77array78.values()79.sliced(80array.offsets().first().to_usize(),81array.offsets().range().to_usize(),82)83.as_ref(),84) + array.offsets().len_proxy() * size_of::<i32>()85+ validity_size(array.validity())86},87FixedSizeList => {88let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();89estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())90},91LargeList => {92let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();93estimated_bytes_size(94array95.values()96.sliced(97array.offsets().first().to_usize(),98array.offsets().range().to_usize(),99)100.as_ref(),101) + array.offsets().len_proxy() * size_of::<i64>()102+ validity_size(array.validity())103},104Struct => {105let array = array.as_any().downcast_ref::<StructArray>().unwrap();106array107.values()108.iter()109.map(|x| x.as_ref())110.map(estimated_bytes_size)111.sum::<usize>()112+ validity_size(array.validity())113},114Union => {115let array = array.as_any().downcast_ref::<UnionArray>().unwrap();116let types = array.types().len() * size_of::<i8>();117let offsets = array118.offsets()119.as_ref()120.map(|x| x.len() * size_of::<i32>())121.unwrap_or_default();122let fields = array123.fields()124.iter()125.map(|x| x.as_ref())126.map(estimated_bytes_size)127.sum::<usize>();128types + offsets + fields129},130Dictionary(key_type) => match_integer_type!(key_type, |$T| {131let array = array132.as_any()133.downcast_ref::<DictionaryArray<$T>>()134.unwrap();135estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())136}),137Utf8View => binview_size::<str>(array.as_any().downcast_ref().unwrap()),138BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()),139Map => {140let array = array.as_any().downcast_ref::<MapArray>().unwrap();141let offsets = array.offsets().len_proxy() * size_of::<i32>();142offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())143},144}145}146147148