Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/array/map/mod.rs
6939 views
1
use super::specification::try_check_offsets_bounds;
2
use super::{Array, Splitable, new_empty_array};
3
use crate::bitmap::Bitmap;
4
use crate::datatypes::{ArrowDataType, Field};
5
use crate::offset::OffsetsBuffer;
6
7
mod ffi;
8
pub(super) mod fmt;
9
mod iterator;
10
11
use polars_error::{PolarsResult, polars_bail};
12
13
/// An array representing a (key, value), both of arbitrary logical types.
14
#[derive(Clone)]
15
pub struct MapArray {
16
dtype: ArrowDataType,
17
// invariant: field.len() == offsets.len()
18
offsets: OffsetsBuffer<i32>,
19
field: Box<dyn Array>,
20
// invariant: offsets.len() - 1 == Bitmap::len()
21
validity: Option<Bitmap>,
22
}
23
24
impl MapArray {
25
/// Returns a new [`MapArray`].
26
/// # Errors
27
/// This function errors iff:
28
/// * `offsets.last()` is greater than `field.len()`
29
/// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`]
30
/// * The fields' `dtype` is not equal to the inner field of `dtype`
31
/// * The validity is not `None` and its length is different from `offsets.len() - 1`.
32
pub fn try_new(
33
dtype: ArrowDataType,
34
offsets: OffsetsBuffer<i32>,
35
field: Box<dyn Array>,
36
validity: Option<Bitmap>,
37
) -> PolarsResult<Self> {
38
try_check_offsets_bounds(&offsets, field.len())?;
39
40
let inner_field = Self::try_get_field(&dtype)?;
41
if let ArrowDataType::Struct(inner) = inner_field.dtype() {
42
if inner.len() != 2 {
43
polars_bail!(ComputeError: "MapArray's inner `Struct` must have 2 fields (keys and maps)")
44
}
45
} else {
46
polars_bail!(ComputeError: "MapArray expects `DataType::Struct` as its inner logical type")
47
}
48
if field.dtype() != inner_field.dtype() {
49
polars_bail!(ComputeError: "MapArray expects `field.dtype` to match its inner DataType")
50
}
51
52
if validity
53
.as_ref()
54
.is_some_and(|validity| validity.len() != offsets.len_proxy())
55
{
56
polars_bail!(ComputeError: "validity mask length must match the number of values")
57
}
58
59
Ok(Self {
60
dtype,
61
field,
62
offsets,
63
validity,
64
})
65
}
66
67
/// Creates a new [`MapArray`].
68
/// # Panics
69
/// * `offsets.last()` is greater than `field.len()`.
70
/// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::Map`],
71
/// * The validity is not `None` and its length is different from `offsets.len() - 1`.
72
pub fn new(
73
dtype: ArrowDataType,
74
offsets: OffsetsBuffer<i32>,
75
field: Box<dyn Array>,
76
validity: Option<Bitmap>,
77
) -> Self {
78
Self::try_new(dtype, offsets, field, validity).unwrap()
79
}
80
81
/// Returns a new null [`MapArray`] of `length`.
82
pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
83
let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
84
Self::new(
85
dtype,
86
vec![0i32; 1 + length].try_into().unwrap(),
87
field,
88
Some(Bitmap::new_zeroed(length)),
89
)
90
}
91
92
/// Returns a new empty [`MapArray`].
93
pub fn new_empty(dtype: ArrowDataType) -> Self {
94
let field = new_empty_array(Self::get_field(&dtype).dtype().clone());
95
Self::new(dtype, OffsetsBuffer::default(), field, None)
96
}
97
}
98
99
impl MapArray {
100
/// Returns a slice of this [`MapArray`].
101
/// # Panics
102
/// panics iff `offset + length > self.len()`
103
pub fn slice(&mut self, offset: usize, length: usize) {
104
assert!(
105
offset + length <= self.len(),
106
"the offset of the new Buffer cannot exceed the existing length"
107
);
108
unsafe { self.slice_unchecked(offset, length) }
109
}
110
111
/// Returns a slice of this [`MapArray`].
112
///
113
/// # Safety
114
/// The caller must ensure that `offset + length < self.len()`.
115
#[inline]
116
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
117
self.validity = self
118
.validity
119
.take()
120
.map(|bitmap| bitmap.sliced_unchecked(offset, length))
121
.filter(|bitmap| bitmap.unset_bits() > 0);
122
self.offsets.slice_unchecked(offset, length + 1);
123
}
124
125
impl_sliced!();
126
impl_mut_validity!();
127
impl_into_array!();
128
129
pub(crate) fn try_get_field(dtype: &ArrowDataType) -> PolarsResult<&Field> {
130
if let ArrowDataType::Map(field, _) = dtype.to_logical_type() {
131
Ok(field.as_ref())
132
} else {
133
polars_bail!(ComputeError: "The dtype's logical type must be DataType::Map")
134
}
135
}
136
137
pub(crate) fn get_field(dtype: &ArrowDataType) -> &Field {
138
Self::try_get_field(dtype).unwrap()
139
}
140
}
141
142
// Accessors
143
impl MapArray {
144
/// Returns the length of this array
145
#[inline]
146
pub fn len(&self) -> usize {
147
self.offsets.len_proxy()
148
}
149
150
/// returns the offsets
151
#[inline]
152
pub fn offsets(&self) -> &OffsetsBuffer<i32> {
153
&self.offsets
154
}
155
156
/// Returns the field (guaranteed to be a `Struct`)
157
#[inline]
158
pub fn field(&self) -> &Box<dyn Array> {
159
&self.field
160
}
161
162
/// Returns the element at index `i`.
163
#[inline]
164
pub fn value(&self, i: usize) -> Box<dyn Array> {
165
assert!(i < self.len());
166
unsafe { self.value_unchecked(i) }
167
}
168
169
/// Returns the element at index `i`.
170
///
171
/// # Safety
172
/// Assumes that the `i < self.len`.
173
#[inline]
174
pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
175
// soundness: the invariant of the function
176
let (start, end) = self.offsets.start_end_unchecked(i);
177
let length = end - start;
178
179
// soundness: the invariant of the struct
180
self.field.sliced_unchecked(start, length)
181
}
182
}
183
184
impl Array for MapArray {
185
impl_common_array!();
186
187
fn validity(&self) -> Option<&Bitmap> {
188
self.validity.as_ref()
189
}
190
191
#[inline]
192
fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
193
Box::new(self.clone().with_validity(validity))
194
}
195
}
196
197
impl Splitable for MapArray {
198
fn check_bound(&self, offset: usize) -> bool {
199
offset <= self.len()
200
}
201
202
unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
203
let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
204
let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
205
206
(
207
Self {
208
dtype: self.dtype.clone(),
209
offsets: lhs_offsets,
210
field: self.field.clone(),
211
validity: lhs_validity,
212
},
213
Self {
214
dtype: self.dtype.clone(),
215
offsets: rhs_offsets,
216
field: self.field.clone(),
217
validity: rhs_validity,
218
},
219
)
220
}
221
}
222
223