Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/io/avro/read/nested.rs
7884 views
1
use polars_error::{PolarsResult, polars_err};
2
3
use crate::array::*;
4
use crate::bitmap::*;
5
use crate::datatypes::*;
6
use crate::offset::{Offset, Offsets};
7
8
/// Auxiliary struct
9
#[derive(Debug)]
10
pub struct DynMutableListArray<O: Offset> {
11
dtype: ArrowDataType,
12
offsets: Offsets<O>,
13
values: Box<dyn MutableArray>,
14
validity: Option<MutableBitmap>,
15
}
16
17
impl<O: Offset> DynMutableListArray<O> {
18
pub fn new_from(values: Box<dyn MutableArray>, dtype: ArrowDataType, capacity: usize) -> Self {
19
assert_eq!(values.len(), 0);
20
ListArray::<O>::get_child_field(&dtype);
21
Self {
22
dtype,
23
offsets: Offsets::<O>::with_capacity(capacity),
24
values,
25
validity: None,
26
}
27
}
28
29
/// The values
30
pub fn mut_values(&mut self) -> &mut dyn MutableArray {
31
self.values.as_mut()
32
}
33
34
#[inline]
35
pub fn try_push_valid(&mut self) -> PolarsResult<()> {
36
let total_length = self.values.len();
37
let offset = self.offsets.last().to_usize();
38
let length = total_length
39
.checked_sub(offset)
40
.ok_or_else(|| polars_err!(ComputeError: "overflow"))?;
41
42
self.offsets.try_push(length)?;
43
if let Some(validity) = &mut self.validity {
44
validity.push(true)
45
}
46
Ok(())
47
}
48
49
#[inline]
50
fn push_null(&mut self) {
51
self.offsets.extend_constant(1);
52
match &mut self.validity {
53
Some(validity) => validity.push(false),
54
None => self.init_validity(),
55
}
56
}
57
58
fn init_validity(&mut self) {
59
let len = self.offsets.len_proxy();
60
61
let mut validity = MutableBitmap::new();
62
validity.extend_constant(len, true);
63
validity.set(len - 1, false);
64
self.validity = Some(validity)
65
}
66
}
67
68
impl<O: Offset> MutableArray for DynMutableListArray<O> {
69
fn len(&self) -> usize {
70
self.offsets.len_proxy()
71
}
72
73
fn validity(&self) -> Option<&MutableBitmap> {
74
self.validity.as_ref()
75
}
76
77
fn as_box(&mut self) -> Box<dyn Array> {
78
ListArray::new(
79
self.dtype.clone(),
80
std::mem::take(&mut self.offsets).into(),
81
self.values.as_box(),
82
std::mem::take(&mut self.validity).map(|x| x.into()),
83
)
84
.boxed()
85
}
86
87
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
88
ListArray::new(
89
self.dtype.clone(),
90
std::mem::take(&mut self.offsets).into(),
91
self.values.as_box(),
92
std::mem::take(&mut self.validity).map(|x| x.into()),
93
)
94
.arced()
95
}
96
97
fn dtype(&self) -> &ArrowDataType {
98
&self.dtype
99
}
100
101
fn as_any(&self) -> &dyn std::any::Any {
102
self
103
}
104
105
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
106
self
107
}
108
109
#[inline]
110
fn push_null(&mut self) {
111
self.push_null()
112
}
113
114
fn reserve(&mut self, _: usize) {
115
todo!();
116
}
117
118
fn shrink_to_fit(&mut self) {
119
todo!();
120
}
121
}
122
123
#[derive(Debug)]
124
pub struct FixedItemsUtf8Dictionary {
125
dtype: ArrowDataType,
126
keys: MutablePrimitiveArray<i32>,
127
values: Utf8Array<i32>,
128
}
129
130
impl FixedItemsUtf8Dictionary {
131
pub fn with_capacity(values: Utf8Array<i32>, capacity: usize) -> Self {
132
Self {
133
dtype: ArrowDataType::Dictionary(
134
IntegerType::Int32,
135
Box::new(values.dtype().clone()),
136
false,
137
),
138
keys: MutablePrimitiveArray::<i32>::with_capacity(capacity),
139
values,
140
}
141
}
142
143
pub fn push_valid(&mut self, key: i32) {
144
self.keys.push(Some(key))
145
}
146
147
/// pushes a null value
148
pub fn push_null(&mut self) {
149
self.keys.push(None)
150
}
151
}
152
153
impl MutableArray for FixedItemsUtf8Dictionary {
154
fn len(&self) -> usize {
155
self.keys.len()
156
}
157
158
fn validity(&self) -> Option<&MutableBitmap> {
159
self.keys.validity()
160
}
161
162
fn as_box(&mut self) -> Box<dyn Array> {
163
Box::new(
164
DictionaryArray::try_new(
165
self.dtype.clone(),
166
std::mem::take(&mut self.keys).into(),
167
Box::new(self.values.clone()),
168
)
169
.unwrap(),
170
)
171
}
172
173
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
174
std::sync::Arc::new(
175
DictionaryArray::try_new(
176
self.dtype.clone(),
177
std::mem::take(&mut self.keys).into(),
178
Box::new(self.values.clone()),
179
)
180
.unwrap(),
181
)
182
}
183
184
fn dtype(&self) -> &ArrowDataType {
185
&self.dtype
186
}
187
188
fn as_any(&self) -> &dyn std::any::Any {
189
self
190
}
191
192
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
193
self
194
}
195
196
#[inline]
197
fn push_null(&mut self) {
198
self.push_null()
199
}
200
201
fn reserve(&mut self, _: usize) {
202
todo!();
203
}
204
205
fn shrink_to_fit(&mut self) {
206
todo!();
207
}
208
}
209
210
/// Auxiliary struct
211
#[derive(Debug)]
212
pub struct DynMutableStructArray {
213
dtype: ArrowDataType,
214
length: usize,
215
values: Vec<Box<dyn MutableArray>>,
216
validity: Option<MutableBitmap>,
217
}
218
219
impl DynMutableStructArray {
220
pub fn new(values: Vec<Box<dyn MutableArray>>, dtype: ArrowDataType) -> Self {
221
Self {
222
dtype,
223
length: 0,
224
values,
225
validity: None,
226
}
227
}
228
229
/// The values
230
pub fn mut_values(&mut self, field: usize) -> &mut dyn MutableArray {
231
self.values[field].as_mut()
232
}
233
234
#[inline]
235
pub fn try_push_valid(&mut self) -> PolarsResult<()> {
236
if let Some(validity) = &mut self.validity {
237
validity.push(true)
238
}
239
self.length += 1;
240
Ok(())
241
}
242
243
#[inline]
244
fn push_null(&mut self) {
245
self.values.iter_mut().for_each(|x| x.push_null());
246
self.length += 1;
247
match &mut self.validity {
248
Some(validity) => validity.push(false),
249
None => self.init_validity(),
250
}
251
}
252
253
fn init_validity(&mut self) {
254
let len = self.len();
255
256
let mut validity = MutableBitmap::new();
257
validity.extend_constant(len, true);
258
validity.set(len - 1, false);
259
self.validity = Some(validity)
260
}
261
}
262
263
impl MutableArray for DynMutableStructArray {
264
fn len(&self) -> usize {
265
self.length
266
}
267
268
fn validity(&self) -> Option<&MutableBitmap> {
269
self.validity.as_ref()
270
}
271
272
fn as_box(&mut self) -> Box<dyn Array> {
273
let values = self.values.iter_mut().map(|x| x.as_box()).collect();
274
275
Box::new(StructArray::new(
276
self.dtype.clone(),
277
self.length,
278
values,
279
std::mem::take(&mut self.validity).map(|x| x.into()),
280
))
281
}
282
283
fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
284
let values = self.values.iter_mut().map(|x| x.as_box()).collect();
285
286
std::sync::Arc::new(StructArray::new(
287
self.dtype.clone(),
288
self.length,
289
values,
290
std::mem::take(&mut self.validity).map(|x| x.into()),
291
))
292
}
293
294
fn dtype(&self) -> &ArrowDataType {
295
&self.dtype
296
}
297
298
fn as_any(&self) -> &dyn std::any::Any {
299
self
300
}
301
302
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
303
self
304
}
305
306
#[inline]
307
fn push_null(&mut self) {
308
self.push_null()
309
}
310
311
fn reserve(&mut self, _: usize) {
312
todo!();
313
}
314
315
fn shrink_to_fit(&mut self) {
316
todo!();
317
}
318
}
319
320