Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/compute/aggregate/memory.rs
8422 views
1
use crate::array::*;
2
use crate::bitmap::Bitmap;
3
use crate::datatypes::PhysicalType;
4
use crate::types::Index;
5
pub use crate::types::PrimitiveType;
6
use crate::{match_integer_type, with_match_primitive_type_full};
7
fn validity_size(validity: Option<&Bitmap>) -> usize {
8
validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)
9
}
10
11
macro_rules! dyn_binary {
12
($array:expr, $ty:ty, $o:ty) => {{
13
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
14
let offsets = array.offsets().buffer();
15
16
// in case of Binary/Utf8/List the offsets are sliced,
17
// not the values buffer
18
let values_start = offsets[0] as usize;
19
let values_end = offsets[offsets.len() - 1] as usize;
20
21
values_end - values_start
22
+ offsets.len() * size_of::<$o>()
23
+ validity_size(array.validity())
24
}};
25
}
26
27
fn binview_size<T: ViewType + ?Sized>(array: &BinaryViewArrayGeneric<T>) -> usize {
28
// We choose the optimal usage as data can be shared across buffers.
29
// If we would sum all buffers we overestimate memory usage and trigger OOC when not needed.
30
array.total_bytes_len()
31
}
32
33
/// Returns the total (heap) allocated size of the array in bytes.
34
/// # Implementation
35
/// This estimation is the sum of the size of its buffers, validity, including nested arrays.
36
/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
37
/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
38
///
39
/// When an array is sliced, its allocated size remains constant because the buffer unchanged.
40
/// However, this function will yield a smaller number. This is because this function returns
41
/// the visible size of the buffer, not its total capacity.
42
///
43
/// FFI buffers are included in this estimation.
44
pub fn estimated_bytes_size(array: &dyn Array) -> usize {
45
use PhysicalType::*;
46
match array.dtype().to_physical_type() {
47
Null => 0,
48
Boolean => {
49
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
50
array.values().as_slice().0.len() + validity_size(array.validity())
51
},
52
Primitive(PrimitiveType::DaysMs) => {
53
let array = array.as_any().downcast_ref::<DaysMsArray>().unwrap();
54
array.values().len() * size_of::<i32>() * 2 + validity_size(array.validity())
55
},
56
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
57
let array = array
58
.as_any()
59
.downcast_ref::<PrimitiveArray<$T>>()
60
.unwrap();
61
62
array.values().len() * size_of::<$T>() + validity_size(array.validity())
63
}),
64
Binary => dyn_binary!(array, BinaryArray<i32>, i32),
65
FixedSizeBinary => {
66
let array = array
67
.as_any()
68
.downcast_ref::<FixedSizeBinaryArray>()
69
.unwrap();
70
array.values().len() + validity_size(array.validity())
71
},
72
LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),
73
Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),
74
LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),
75
List => {
76
let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
77
estimated_bytes_size(
78
array
79
.values()
80
.sliced(
81
array.offsets().first().to_usize(),
82
array.offsets().range().to_usize(),
83
)
84
.as_ref(),
85
) + array.offsets().len_proxy() * size_of::<i32>()
86
+ validity_size(array.validity())
87
},
88
FixedSizeList => {
89
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
90
estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())
91
},
92
LargeList => {
93
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
94
estimated_bytes_size(
95
array
96
.values()
97
.sliced(
98
array.offsets().first().to_usize(),
99
array.offsets().range().to_usize(),
100
)
101
.as_ref(),
102
) + array.offsets().len_proxy() * size_of::<i64>()
103
+ validity_size(array.validity())
104
},
105
Struct => {
106
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
107
array
108
.values()
109
.iter()
110
.map(|x| x.as_ref())
111
.map(estimated_bytes_size)
112
.sum::<usize>()
113
+ validity_size(array.validity())
114
},
115
Union => {
116
let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
117
let types = array.types().len() * size_of::<i8>();
118
let offsets = array
119
.offsets()
120
.as_ref()
121
.map(|x| x.len() * size_of::<i32>())
122
.unwrap_or_default();
123
let fields = array
124
.fields()
125
.iter()
126
.map(|x| x.as_ref())
127
.map(estimated_bytes_size)
128
.sum::<usize>();
129
types + offsets + fields
130
},
131
Dictionary(key_type) => match_integer_type!(key_type, |$T| {
132
let array = array
133
.as_any()
134
.downcast_ref::<DictionaryArray<$T>>()
135
.unwrap();
136
estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())
137
}),
138
Utf8View => binview_size::<str>(array.as_any().downcast_ref().unwrap()),
139
BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()),
140
Map => {
141
let array = array.as_any().downcast_ref::<MapArray>().unwrap();
142
let offsets = array.offsets().len_proxy() * size_of::<i32>();
143
offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())
144
},
145
}
146
}
147
148