Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/compute/aggregate/memory.rs
6939 views
1
use crate::array::*;
2
use crate::bitmap::Bitmap;
3
use crate::datatypes::PhysicalType;
4
pub use crate::types::PrimitiveType;
5
use crate::{match_integer_type, with_match_primitive_type_full};
6
fn validity_size(validity: Option<&Bitmap>) -> usize {
7
validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)
8
}
9
10
macro_rules! dyn_binary {
11
($array:expr, $ty:ty, $o:ty) => {{
12
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
13
let offsets = array.offsets().buffer();
14
15
// in case of Binary/Utf8/List the offsets are sliced,
16
// not the values buffer
17
let values_start = offsets[0] as usize;
18
let values_end = offsets[offsets.len() - 1] as usize;
19
20
values_end - values_start
21
+ offsets.len() * size_of::<$o>()
22
+ validity_size(array.validity())
23
}};
24
}
25
26
fn binview_size<T: ViewType + ?Sized>(array: &BinaryViewArrayGeneric<T>) -> usize {
27
// We choose the optimal usage as data can be shared across buffers.
28
// If we would sum all buffers we overestimate memory usage and trigger OOC when not needed.
29
array.total_bytes_len()
30
}
31
32
/// Returns the total (heap) allocated size of the array in bytes.
33
/// # Implementation
34
/// This estimation is the sum of the size of its buffers, validity, including nested arrays.
35
/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
36
/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
37
///
38
/// When an array is sliced, its allocated size remains constant because the buffer unchanged.
39
/// However, this function will yield a smaller number. This is because this function returns
40
/// the visible size of the buffer, not its total capacity.
41
///
42
/// FFI buffers are included in this estimation.
43
pub fn estimated_bytes_size(array: &dyn Array) -> usize {
44
use PhysicalType::*;
45
match array.dtype().to_physical_type() {
46
Null => 0,
47
Boolean => {
48
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
49
array.values().as_slice().0.len() + validity_size(array.validity())
50
},
51
Primitive(PrimitiveType::DaysMs) => {
52
let array = array.as_any().downcast_ref::<DaysMsArray>().unwrap();
53
array.values().len() * size_of::<i32>() * 2 + validity_size(array.validity())
54
},
55
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
56
let array = array
57
.as_any()
58
.downcast_ref::<PrimitiveArray<$T>>()
59
.unwrap();
60
61
array.values().len() * size_of::<$T>() + validity_size(array.validity())
62
}),
63
Binary => dyn_binary!(array, BinaryArray<i32>, i32),
64
FixedSizeBinary => {
65
let array = array
66
.as_any()
67
.downcast_ref::<FixedSizeBinaryArray>()
68
.unwrap();
69
array.values().len() + validity_size(array.validity())
70
},
71
LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),
72
Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),
73
LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),
74
List => {
75
let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
76
estimated_bytes_size(array.values().as_ref())
77
+ array.offsets().len_proxy() * size_of::<i32>()
78
+ validity_size(array.validity())
79
},
80
FixedSizeList => {
81
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
82
estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())
83
},
84
LargeList => {
85
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
86
estimated_bytes_size(array.values().as_ref())
87
+ array.offsets().len_proxy() * size_of::<i64>()
88
+ validity_size(array.validity())
89
},
90
Struct => {
91
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
92
array
93
.values()
94
.iter()
95
.map(|x| x.as_ref())
96
.map(estimated_bytes_size)
97
.sum::<usize>()
98
+ validity_size(array.validity())
99
},
100
Union => {
101
let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
102
let types = array.types().len() * size_of::<i8>();
103
let offsets = array
104
.offsets()
105
.as_ref()
106
.map(|x| x.len() * size_of::<i32>())
107
.unwrap_or_default();
108
let fields = array
109
.fields()
110
.iter()
111
.map(|x| x.as_ref())
112
.map(estimated_bytes_size)
113
.sum::<usize>();
114
types + offsets + fields
115
},
116
Dictionary(key_type) => match_integer_type!(key_type, |$T| {
117
let array = array
118
.as_any()
119
.downcast_ref::<DictionaryArray<$T>>()
120
.unwrap();
121
estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())
122
}),
123
Utf8View => binview_size::<str>(array.as_any().downcast_ref().unwrap()),
124
BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()),
125
Map => {
126
let array = array.as_any().downcast_ref::<MapArray>().unwrap();
127
let offsets = array.offsets().len_proxy() * size_of::<i32>();
128
offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())
129
},
130
}
131
}
132
133