Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/into.rs
8458 views
1
#[cfg(any(
2
feature = "dtype-datetime",
3
feature = "dtype-date",
4
feature = "dtype-duration",
5
feature = "dtype-time"
6
))]
7
use polars_compute::cast::cast_default;
8
use polars_compute::cast::cast_unchecked;
9
10
use crate::prelude::*;
11
12
impl Series {
13
/// Returns a reference to the Arrow ArrayRef
14
#[inline]
15
pub fn array_ref(&self, chunk_idx: usize) -> &ArrayRef {
16
&self.chunks()[chunk_idx] as &ArrayRef
17
}
18
19
/// Convert a chunk in the Series to the correct Arrow type.
20
/// This conversion is needed because polars doesn't use a
21
/// 1 on 1 mapping for logical/categoricals, etc.
22
pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef {
23
self.to_arrow_with_field(chunk_idx, compat_level, None)
24
.unwrap()
25
}
26
27
pub fn to_arrow_with_field(
28
&self,
29
chunk_idx: usize,
30
compat_level: CompatLevel,
31
output_arrow_field: Option<&ArrowField>,
32
) -> PolarsResult<ArrayRef> {
33
ToArrowConverter {
34
compat_level,
35
#[cfg(feature = "dtype-categorical")]
36
categorical_converter: {
37
let mut categorical_converter =
38
crate::series::categorical_to_arrow::CategoricalToArrowConverter {
39
converters: Default::default(),
40
persist_remap: false,
41
output_keys_only: false,
42
};
43
44
categorical_converter.initialize(self.dtype());
45
46
categorical_converter
47
},
48
}
49
.array_to_arrow(
50
self.chunks().get(chunk_idx).unwrap().as_ref(),
51
self.dtype(),
52
output_arrow_field,
53
)
54
}
55
}
56
57
pub struct ToArrowConverter {
58
pub compat_level: CompatLevel,
59
#[cfg(feature = "dtype-categorical")]
60
pub categorical_converter: crate::series::categorical_to_arrow::CategoricalToArrowConverter,
61
}
62
63
impl ToArrowConverter {
64
/// Returns an error if `output_arrow_field` was provided and does not match the output data type.
65
pub fn array_to_arrow(
66
&mut self,
67
array: &dyn Array,
68
dtype: &DataType,
69
output_arrow_field: Option<&ArrowField>,
70
) -> PolarsResult<Box<dyn Array>> {
71
let out = self.array_to_arrow_impl(array, dtype, output_arrow_field)?;
72
73
if let Some(field) = output_arrow_field {
74
polars_ensure!(
75
field.is_nullable || !out.has_nulls(),
76
SchemaMismatch:
77
"to_arrow(): nullable is false but array contained {} NULLs (arrow field: {:?})",
78
out.null_count(), field,
79
);
80
81
// Don't eq nested types (they will recurse here with the inner types).
82
if (!field.dtype().is_nested()
83
|| matches!(field.dtype(), ArrowDataType::Dictionary(..)))
84
&& out.dtype() != field.dtype()
85
{
86
polars_bail!(
87
SchemaMismatch:
88
"to_arrow(): provided dtype ({:?}) does not match output dtype ({:?})",
89
field.dtype(), out.dtype()
90
)
91
}
92
}
93
94
Ok(out)
95
}
96
97
fn array_to_arrow_impl(
98
&mut self,
99
array: &dyn Array,
100
dtype: &DataType,
101
output_arrow_field: Option<&ArrowField>,
102
) -> PolarsResult<Box<dyn Array>> {
103
Ok(match dtype {
104
// make sure that we recursively apply all logical types.
105
#[cfg(feature = "dtype-struct")]
106
DataType::Struct(fields) => {
107
use arrow::array::StructArray;
108
let arr: &StructArray = array.as_any().downcast_ref().unwrap();
109
110
let expected_output_fields: &[ArrowField] = match output_arrow_field {
111
Some(
112
field @ ArrowField {
113
name: _,
114
dtype: ArrowDataType::Struct(fields),
115
is_nullable: _,
116
metadata: _,
117
},
118
) if fields.len() == arr.fields().len()
119
&& fields
120
.iter()
121
.zip(arr.fields())
122
.all(|(l, r)| l.name() == r.name()) =>
123
{
124
fields.as_slice()
125
},
126
Some(ArrowField { dtype, .. }) => polars_bail!(
127
SchemaMismatch:
128
"to_arrow(): struct dtype mismatch: {:?} != expected: {:?}",
129
dtype, arr.dtype(),
130
),
131
None => &[],
132
};
133
134
let values: Vec<ArrayRef> = arr
135
.values()
136
.iter()
137
.zip(fields.iter())
138
.enumerate()
139
.map(|(i, (values, field))| {
140
self.array_to_arrow(
141
values.as_ref(),
142
field.dtype(),
143
expected_output_fields.get(i),
144
)
145
})
146
.collect::<PolarsResult<_>>()?;
147
148
let converted_arrow_fields: Vec<ArrowField> = fields
149
.iter()
150
.map(|x| (x.name().clone(), x.dtype()))
151
.zip(values.iter().map(|x| x.dtype()))
152
.enumerate()
153
.map(|(i, ((name, dtype), converted_arrow_dtype))| {
154
create_arrow_field(
155
name,
156
dtype,
157
converted_arrow_dtype,
158
self.compat_level,
159
opt_field_is_nullable(expected_output_fields.get(i)),
160
)
161
})
162
.collect();
163
164
StructArray::new(
165
ArrowDataType::Struct(converted_arrow_fields),
166
arr.len(),
167
values,
168
arr.validity().cloned(),
169
)
170
.boxed()
171
},
172
DataType::List(inner) => {
173
let arr: &ListArray<i64> = array.as_any().downcast_ref().unwrap();
174
175
let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
176
Some(ArrowField {
177
name: _,
178
dtype: ArrowDataType::LargeList(inner_field),
179
is_nullable: _,
180
metadata: _,
181
}) if inner_field.name() == &LIST_VALUES_NAME => Some(inner_field),
182
Some(ArrowField { dtype, .. }) => polars_bail!(
183
SchemaMismatch:
184
"to_arrow(): list dtype mismatch: {:?} != expected: {:?}",
185
dtype, arr.dtype(),
186
),
187
None => None,
188
};
189
190
let new_values =
191
self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
192
193
let arr = ListArray::<i64>::new(
194
ArrowDataType::LargeList(Box::new(create_arrow_field(
195
LIST_VALUES_NAME,
196
inner.as_ref(),
197
new_values.dtype(),
198
self.compat_level,
199
opt_field_is_nullable(expected_inner_output_field),
200
))),
201
arr.offsets().clone(),
202
new_values,
203
arr.validity().cloned(),
204
);
205
Box::new(arr)
206
},
207
#[cfg(feature = "dtype-array")]
208
DataType::Array(inner, width) => {
209
use arrow::array::FixedSizeListArray;
210
211
let arr: &FixedSizeListArray = array.as_any().downcast_ref().unwrap();
212
213
let expected_inner_output_field: Option<&ArrowField> = match output_arrow_field {
214
Some(
215
field @ ArrowField {
216
name: _,
217
dtype: ArrowDataType::FixedSizeList(inner_field, width),
218
is_nullable: _,
219
metadata: _,
220
},
221
) if *width == arr.size() && inner_field.name() == &LIST_VALUES_NAME => {
222
Some(inner_field)
223
},
224
Some(ArrowField { dtype, .. }) => polars_bail!(
225
SchemaMismatch:
226
"to_arrow(): fixed-size list dtype mismatch: {:?} != expected: {:?}",
227
dtype, arr.dtype(),
228
),
229
None => None,
230
};
231
232
let new_values =
233
self.array_to_arrow(arr.values().as_ref(), inner, expected_inner_output_field)?;
234
235
let arr = FixedSizeListArray::new(
236
ArrowDataType::FixedSizeList(
237
Box::new(create_arrow_field(
238
LIST_VALUES_NAME,
239
inner.as_ref(),
240
new_values.dtype(),
241
self.compat_level,
242
opt_field_is_nullable(expected_inner_output_field),
243
)),
244
*width,
245
),
246
arr.len(),
247
new_values,
248
arr.validity().cloned(),
249
);
250
Box::new(arr)
251
},
252
#[cfg(feature = "dtype-categorical")]
253
DataType::Categorical(_, _) | DataType::Enum(_, _) => self
254
.categorical_converter
255
.array_to_arrow(array, dtype, self.compat_level),
256
#[cfg(feature = "dtype-date")]
257
DataType::Date => {
258
cast_default(array, &DataType::Date.to_arrow(self.compat_level)).unwrap()
259
},
260
#[cfg(feature = "dtype-datetime")]
261
DataType::Datetime(_, _) => {
262
cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
263
},
264
#[cfg(feature = "dtype-duration")]
265
DataType::Duration(_) => {
266
cast_default(array, &dtype.to_arrow(self.compat_level)).unwrap()
267
},
268
#[cfg(feature = "dtype-time")]
269
DataType::Time => {
270
cast_default(array, &DataType::Time.to_arrow(self.compat_level)).unwrap()
271
},
272
#[cfg(feature = "dtype-decimal")]
273
DataType::Decimal(_, _) => array
274
.as_any()
275
.downcast_ref::<arrow::array::PrimitiveArray<i128>>()
276
.unwrap()
277
.clone()
278
.to(dtype.to_arrow(CompatLevel::newest()))
279
.to_boxed(),
280
#[cfg(feature = "object")]
281
DataType::Object(_) => {
282
use crate::chunked_array::object::builder::object_series_to_arrow_array;
283
object_series_to_arrow_array(&unsafe {
284
Series::from_chunks_and_dtype_unchecked(
285
PlSmallStr::EMPTY,
286
vec![array.to_boxed()],
287
dtype,
288
)
289
})
290
},
291
DataType::String => {
292
if self.compat_level.0 >= 1 {
293
array.to_boxed()
294
} else {
295
cast_unchecked(array, &ArrowDataType::LargeUtf8).unwrap()
296
}
297
},
298
DataType::Binary => {
299
if self.compat_level.0 >= 1 {
300
array.to_boxed()
301
} else {
302
cast_unchecked(array, &ArrowDataType::LargeBinary).unwrap()
303
}
304
},
305
#[cfg(feature = "dtype-extension")]
306
DataType::Extension(typ, storage_dtype) => {
307
use arrow::datatypes::ExtensionType;
308
309
let output_ext_name: PlSmallStr = typ.name().into();
310
let output_ext_md: Option<PlSmallStr> =
311
typ.serialize_metadata().map(|md| md.into());
312
313
let expected_inner_output_field: Option<ArrowField> = match output_arrow_field {
314
Some(
315
field @ ArrowField {
316
name: _,
317
dtype: ArrowDataType::Extension(ext_type),
318
is_nullable: _,
319
metadata: _,
320
},
321
) if {
322
let ExtensionType {
323
name,
324
inner: _,
325
metadata,
326
} = ext_type.as_ref();
327
328
name == &output_ext_name
329
&& metadata.as_ref().filter(|x| !x.is_empty())
330
== output_ext_md.as_ref().filter(|x| !x.is_empty())
331
} =>
332
{
333
let ExtensionType {
334
name,
335
inner,
336
metadata: _,
337
} = ext_type.as_ref();
338
339
Some(create_arrow_field(
340
name.clone(),
341
storage_dtype.as_ref(),
342
inner,
343
self.compat_level,
344
true,
345
))
346
},
347
Some(ArrowField { dtype, .. }) => {
348
let expected_inner = self
349
.array_to_arrow(array.sliced(0, 0).as_ref(), storage_dtype, None)
350
.unwrap()
351
.dtype()
352
.clone();
353
354
let expected = ArrowDataType::Extension(Box::new(ExtensionType {
355
name: output_ext_name,
356
inner: expected_inner,
357
metadata: output_ext_md,
358
}));
359
360
polars_bail!(
361
SchemaMismatch:
362
"to_arrow(): extension dtype mismatch: {:?} != expected: {:?}",
363
dtype, expected,
364
)
365
},
366
None => None,
367
};
368
369
let mut arr = self.array_to_arrow(
370
array,
371
storage_dtype,
372
expected_inner_output_field.as_ref(),
373
)?;
374
375
*arr.dtype_mut() = ArrowDataType::Extension(Box::new(ExtensionType {
376
name: output_ext_name,
377
inner: arr.dtype().clone(),
378
metadata: output_ext_md,
379
}));
380
arr
381
},
382
_ => {
383
assert!(!dtype.is_logical());
384
array.to_boxed()
385
},
386
})
387
}
388
}
389
390
fn create_arrow_field(
391
name: PlSmallStr,
392
dtype: &DataType,
393
arrow_dtype: &ArrowDataType,
394
compat_level: CompatLevel,
395
is_nullable: bool,
396
) -> ArrowField {
397
match (dtype, arrow_dtype) {
398
#[cfg(feature = "dtype-categorical")]
399
(DataType::Categorical(..) | DataType::Enum(..), ArrowDataType::Dictionary(_, _, _)) => {
400
// Sets _PL_ metadata
401
let mut out = dtype.to_arrow_field(name, compat_level);
402
out.is_nullable = is_nullable;
403
out
404
},
405
_ => ArrowField::new(name, arrow_dtype.clone(), is_nullable),
406
}
407
}
408
409
fn opt_field_is_nullable(opt_field: Option<&ArrowField>) -> bool {
410
opt_field.is_none_or(|x| x.is_nullable)
411
}
412
413