Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-compute/src/cast/mod.rs
6939 views
1
//! Defines different casting operators such as [`cast`] or [`primitive_to_binary`].
2
3
mod binary_to;
4
mod binview_to;
5
mod boolean_to;
6
mod decimal_to;
7
mod dictionary_to;
8
mod primitive_to;
9
mod utf8_to;
10
11
use arrow::bitmap::MutableBitmap;
12
pub use binary_to::*;
13
#[cfg(feature = "dtype-decimal")]
14
pub use binview_to::binview_to_decimal;
15
use binview_to::utf8view_to_primitive_dyn;
16
pub use binview_to::utf8view_to_utf8;
17
pub use boolean_to::*;
18
pub use decimal_to::*;
19
pub mod temporal;
20
use arrow::array::*;
21
use arrow::datatypes::*;
22
use arrow::match_integer_type;
23
use arrow::offset::{Offset, Offsets};
24
use binview_to::{
25
binview_to_dictionary, utf8view_to_date32_dyn, utf8view_to_dictionary,
26
utf8view_to_naive_timestamp_dyn, view_to_binary,
27
};
28
pub use binview_to::{binview_to_fixed_size_list_dyn, binview_to_primitive_dyn};
29
use dictionary_to::*;
30
use polars_error::{PolarsResult, polars_bail, polars_ensure, polars_err};
31
use polars_utils::IdxSize;
32
pub use primitive_to::*;
33
use temporal::utf8view_to_timestamp;
34
pub use utf8_to::*;
35
36
/// options defining how Cast kernels behave
37
#[derive(Clone, Copy, Debug, Default)]
38
pub struct CastOptionsImpl {
39
/// default to false
40
/// whether an overflowing cast should be converted to `None` (default), or be wrapped (i.e. `256i16 as u8 = 0` vectorized).
41
/// Settings this to `true` is 5-6x faster for numeric types.
42
pub wrapped: bool,
43
/// default to false
44
/// whether to cast to an integer at the best-effort
45
pub partial: bool,
46
}
47
48
impl CastOptionsImpl {
49
pub fn unchecked() -> Self {
50
Self {
51
wrapped: true,
52
partial: false,
53
}
54
}
55
}
56
57
impl CastOptionsImpl {
58
fn with_wrapped(&self, v: bool) -> Self {
59
let mut option = *self;
60
option.wrapped = v;
61
option
62
}
63
}
64
65
macro_rules! primitive_dyn {
66
($from:expr, $expr:tt) => {{
67
let from = $from.as_any().downcast_ref().unwrap();
68
Ok(Box::new($expr(from)))
69
}};
70
($from:expr, $expr:tt, $to:expr) => {{
71
let from = $from.as_any().downcast_ref().unwrap();
72
Ok(Box::new($expr(from, $to)))
73
}};
74
($from:expr, $expr:tt, $from_t:expr, $to:expr) => {{
75
let from = $from.as_any().downcast_ref().unwrap();
76
Ok(Box::new($expr(from, $from_t, $to)))
77
}};
78
($from:expr, $expr:tt, $arg1:expr, $arg2:expr, $arg3:expr) => {{
79
let from = $from.as_any().downcast_ref().unwrap();
80
Ok(Box::new($expr(from, $arg1, $arg2, $arg3)))
81
}};
82
}
83
84
fn cast_struct(
85
array: &StructArray,
86
to_type: &ArrowDataType,
87
options: CastOptionsImpl,
88
) -> PolarsResult<StructArray> {
89
let values = array.values();
90
let fields = StructArray::get_fields(to_type);
91
let new_values = values
92
.iter()
93
.zip(fields)
94
.map(|(arr, field)| cast(arr.as_ref(), field.dtype(), options))
95
.collect::<PolarsResult<Vec<_>>>()?;
96
97
Ok(StructArray::new(
98
to_type.clone(),
99
array.len(),
100
new_values,
101
array.validity().cloned(),
102
))
103
}
104
105
fn cast_list<O: Offset>(
106
array: &ListArray<O>,
107
to_type: &ArrowDataType,
108
options: CastOptionsImpl,
109
) -> PolarsResult<ListArray<O>> {
110
let values = array.values();
111
let new_values = cast(
112
values.as_ref(),
113
ListArray::<O>::get_child_type(to_type),
114
options,
115
)?;
116
117
Ok(ListArray::<O>::new(
118
to_type.clone(),
119
array.offsets().clone(),
120
new_values,
121
array.validity().cloned(),
122
))
123
}
124
125
fn cast_list_to_large_list(array: &ListArray<i32>, to_type: &ArrowDataType) -> ListArray<i64> {
126
let offsets = array.offsets().into();
127
128
ListArray::<i64>::new(
129
to_type.clone(),
130
offsets,
131
array.values().clone(),
132
array.validity().cloned(),
133
)
134
}
135
136
fn cast_large_to_list(array: &ListArray<i64>, to_type: &ArrowDataType) -> ListArray<i32> {
137
let offsets = array.offsets().try_into().expect("Convertme to error");
138
139
ListArray::<i32>::new(
140
to_type.clone(),
141
offsets,
142
array.values().clone(),
143
array.validity().cloned(),
144
)
145
}
146
147
fn cast_fixed_size_list_to_list<O: Offset>(
148
fixed: &FixedSizeListArray,
149
to_type: &ArrowDataType,
150
options: CastOptionsImpl,
151
) -> PolarsResult<ListArray<O>> {
152
let new_values = cast(
153
fixed.values().as_ref(),
154
ListArray::<O>::get_child_type(to_type),
155
options,
156
)?;
157
158
let offsets = (0..=fixed.len())
159
.map(|ix| O::from_as_usize(ix * fixed.size()))
160
.collect::<Vec<_>>();
161
// SAFETY: offsets _are_ monotonically increasing
162
let offsets = unsafe { Offsets::new_unchecked(offsets) };
163
164
Ok(ListArray::<O>::new(
165
to_type.clone(),
166
offsets.into(),
167
new_values,
168
fixed.validity().cloned(),
169
))
170
}
171
172
pub(super) fn cast_list_to_fixed_size_list<O: Offset>(
173
list: &ListArray<O>,
174
inner: &Field,
175
size: usize,
176
options: CastOptionsImpl,
177
) -> PolarsResult<FixedSizeListArray> {
178
let null_cnt = list.null_count();
179
let new_values = if null_cnt == 0 {
180
let start_offset = list.offsets().first().to_usize();
181
let offsets = list.offsets().buffer();
182
183
let mut is_valid = true;
184
for (i, offset) in offsets.iter().enumerate() {
185
is_valid &= offset.to_usize() == start_offset + i * size;
186
}
187
188
polars_ensure!(is_valid, ComputeError: "not all elements have the specified width {size}");
189
190
let sliced_values = list
191
.values()
192
.sliced(start_offset, list.offsets().range().to_usize());
193
cast(sliced_values.as_ref(), inner.dtype(), options)?
194
} else {
195
let offsets = list.offsets().as_slice();
196
// Check the lengths of each list are equal to the fixed size.
197
// SAFETY: we know the index is in bound.
198
let mut expected_offset = unsafe { *offsets.get_unchecked(0) } + O::from_as_usize(size);
199
for i in 1..=list.len() {
200
// SAFETY: we know the index is in bound.
201
let current_offset = unsafe { *offsets.get_unchecked(i) };
202
if list.is_null(i - 1) {
203
expected_offset = current_offset + O::from_as_usize(size);
204
} else {
205
polars_ensure!(current_offset == expected_offset, ComputeError:
206
"not all elements have the specified width {size}");
207
expected_offset += O::from_as_usize(size);
208
}
209
}
210
211
// Build take indices for the values. This is used to fill in the null slots.
212
let mut indices =
213
MutablePrimitiveArray::<IdxSize>::with_capacity(list.values().len() + null_cnt * size);
214
for i in 0..list.len() {
215
if list.is_null(i) {
216
indices.extend_constant(size, None)
217
} else {
218
// SAFETY: we know the index is in bound.
219
let current_offset = unsafe { *offsets.get_unchecked(i) };
220
for j in 0..size {
221
indices.push(Some(
222
(current_offset + O::from_as_usize(j)).to_usize() as IdxSize
223
));
224
}
225
}
226
}
227
let take_values =
228
unsafe { crate::gather::take_unchecked(list.values().as_ref(), &indices.freeze()) };
229
230
cast(take_values.as_ref(), inner.dtype(), options)?
231
};
232
233
FixedSizeListArray::try_new(
234
ArrowDataType::FixedSizeList(Box::new(inner.clone()), size),
235
list.len(),
236
new_values,
237
list.validity().cloned(),
238
)
239
.map_err(|_| polars_err!(ComputeError: "not all elements have the specified width {size}"))
240
}
241
242
fn cast_list_uint8_to_binary<O: Offset>(list: &ListArray<O>) -> PolarsResult<BinaryViewArray> {
243
let mut views = Vec::with_capacity(list.len());
244
let mut result_validity = MutableBitmap::from_len_set(list.len());
245
246
let u8array: &PrimitiveArray<u8> = list.values().as_any().downcast_ref().unwrap();
247
let slice = u8array.values().as_slice();
248
let mut cloned_buffers = vec![u8array.values().clone()];
249
let mut buf_index = 0;
250
let mut previous_buf_lengths = 0;
251
let validity = list.validity();
252
let internal_validity = list.values().validity();
253
let offsets = list.offsets();
254
255
let mut all_views_inline = true;
256
257
// In a View for BinaryViewArray, both length and offset are u32.
258
#[cfg(not(test))]
259
const MAX_BUF_SIZE: usize = u32::MAX as usize;
260
261
// This allows us to test some invariants without using 4GB of RAM; see mod
262
// tests below.
263
#[cfg(test)]
264
const MAX_BUF_SIZE: usize = 15;
265
266
for index in 0..list.len() {
267
// Check if there's a null instead of a list:
268
if let Some(validity) = validity {
269
// SAFETY: We are generating indexes limited to < list.len().
270
debug_assert!(index < validity.len());
271
if unsafe { !validity.get_bit_unchecked(index) } {
272
debug_assert!(index < result_validity.len());
273
unsafe {
274
result_validity.set_unchecked(index, false);
275
}
276
views.push(View::default());
277
continue;
278
}
279
}
280
281
// SAFETY: We are generating indexes limited to < list.len().
282
debug_assert!(index < offsets.len());
283
let (start, end) = unsafe { offsets.start_end_unchecked(index) };
284
let length = end - start;
285
polars_ensure!(
286
length <= MAX_BUF_SIZE,
287
InvalidOperation: format!("when casting to BinaryView, list lengths must be <= {MAX_BUF_SIZE}")
288
);
289
290
// Check if the list contains nulls:
291
if let Some(internal_validity) = internal_validity {
292
if internal_validity.null_count_range(start, length) > 0 {
293
debug_assert!(index < result_validity.len());
294
unsafe {
295
result_validity.set_unchecked(index, false);
296
}
297
views.push(View::default());
298
continue;
299
}
300
}
301
302
if end - previous_buf_lengths > MAX_BUF_SIZE {
303
// View offsets must fit in u32 (or smaller value when running Rust
304
// tests), and we've determined the end of the next view will be
305
// past that.
306
buf_index += 1;
307
let (previous, next) = cloned_buffers
308
.last()
309
.unwrap()
310
.split_at(start - previous_buf_lengths);
311
debug_assert!(previous.len() <= MAX_BUF_SIZE);
312
previous_buf_lengths += previous.len();
313
*(cloned_buffers.last_mut().unwrap()) = previous;
314
cloned_buffers.push(next);
315
}
316
let view = View::new_from_bytes(
317
&slice[start..end],
318
buf_index,
319
(start - previous_buf_lengths) as u32,
320
);
321
if !view.is_inline() {
322
all_views_inline = false;
323
}
324
debug_assert_eq!(
325
unsafe { view.get_slice_unchecked(&cloned_buffers) },
326
&slice[start..end]
327
);
328
views.push(view);
329
}
330
331
// Optimization: don't actually need buffers if Views are all inline.
332
if all_views_inline {
333
cloned_buffers.clear();
334
}
335
336
let result_buffers = cloned_buffers.into_boxed_slice().into();
337
let result = if cfg!(debug_assertions) {
338
// A safer wrapper around new_unchecked_unknown_md; it shouldn't ever
339
// fail in practice.
340
BinaryViewArrayGeneric::try_new(
341
ArrowDataType::BinaryView,
342
views.into(),
343
result_buffers,
344
result_validity.into(),
345
)?
346
} else {
347
unsafe {
348
BinaryViewArrayGeneric::new_unchecked_unknown_md(
349
ArrowDataType::BinaryView,
350
views.into(),
351
result_buffers,
352
result_validity.into(),
353
// We could compute this ourselves, but we want to make this code
354
// match debug_assertions path as much as possible.
355
None,
356
)
357
}
358
};
359
360
Ok(result)
361
}
362
363
pub fn cast_default(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
364
cast(array, to_type, Default::default())
365
}
366
367
pub fn cast_unchecked(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
368
cast(array, to_type, CastOptionsImpl::unchecked())
369
}
370
371
/// Cast `array` to the provided data type and return a new [`Array`] with
372
/// type `to_type`, if possible.
373
///
374
/// Behavior:
375
/// * PrimitiveArray to PrimitiveArray: overflowing cast will be None
376
/// * Boolean to Utf8: `true` => '1', `false` => `0`
377
/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
378
/// in integer casts return null
379
/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
380
/// * List to List: the underlying data type is cast
381
/// * Fixed Size List to List: the underlying data type is cast
382
/// * List to Fixed Size List: the offsets are checked for valid order, then the
383
/// underlying type is cast.
384
/// * List of UInt8 to Binary: the list of integers becomes binary data, nulls in the list means it becomes a null
385
/// * Struct to Struct: the underlying fields are cast.
386
/// * PrimitiveArray to List: a list array with 1 value per slot is created
387
/// * Date32 and Date64: precision lost when going to higher interval
388
/// * Time32 and Time64: precision lost when going to higher interval
389
/// * Timestamp and Date{32|64}: precision lost when going to higher interval
390
/// * Temporal to/from backing primitive: zero-copy with data type change
391
///
392
/// Unsupported Casts
393
/// * non-`StructArray` to `StructArray` or `StructArray` to non-`StructArray`
394
/// * List to primitive (other than UInt8)
395
/// * Utf8 to boolean
396
/// * Interval and duration
397
pub fn cast(
398
array: &dyn Array,
399
to_type: &ArrowDataType,
400
options: CastOptionsImpl,
401
) -> PolarsResult<Box<dyn Array>> {
402
use ArrowDataType::*;
403
let from_type = array.dtype();
404
405
// clone array if types are the same
406
if from_type == to_type {
407
return Ok(clone(array));
408
}
409
410
let as_options = options.with_wrapped(true);
411
match (from_type, to_type) {
412
(Null, _) | (_, Null) => Ok(new_null_array(to_type.clone(), array.len())),
413
(Struct(from_fd), Struct(to_fd)) => {
414
polars_ensure!(from_fd.len() == to_fd.len(), InvalidOperation: "Cannot cast struct with different number of fields.");
415
cast_struct(array.as_any().downcast_ref().unwrap(), to_type, options).map(|x| x.boxed())
416
},
417
(Struct(_), _) | (_, Struct(_)) => polars_bail!(InvalidOperation:
418
"Cannot cast from struct to other types"
419
),
420
(Dictionary(index_type, ..), _) => match_integer_type!(index_type, |$T| {
421
dictionary_cast_dyn::<$T>(array, to_type, options)
422
}),
423
(_, Dictionary(index_type, value_type, _)) => match_integer_type!(index_type, |$T| {
424
cast_to_dictionary::<$T>(array, value_type, options)
425
}),
426
// not supported by polars
427
// (List(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i32>(
428
// array.as_any().downcast_ref().unwrap(),
429
// inner.as_ref(),
430
// *size,
431
// options,
432
// )
433
// .map(|x| x.boxed()),
434
(LargeList(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i64>(
435
array.as_any().downcast_ref().unwrap(),
436
inner.as_ref(),
437
*size,
438
options,
439
)
440
.map(|x| x.boxed()),
441
(FixedSizeList(_, _), List(_)) => cast_fixed_size_list_to_list::<i32>(
442
array.as_any().downcast_ref().unwrap(),
443
to_type,
444
options,
445
)
446
.map(|x| x.boxed()),
447
(FixedSizeList(_, _), LargeList(_)) => cast_fixed_size_list_to_list::<i64>(
448
array.as_any().downcast_ref().unwrap(),
449
to_type,
450
options,
451
)
452
.map(|x| x.boxed()),
453
(List(field), BinaryView) if matches!(field.dtype(), UInt8) => {
454
cast_list_uint8_to_binary::<i32>(array.as_any().downcast_ref().unwrap())
455
.map(|arr| arr.boxed())
456
},
457
(LargeList(field), BinaryView) if matches!(field.dtype(), UInt8) => {
458
cast_list_uint8_to_binary::<i64>(array.as_any().downcast_ref().unwrap())
459
.map(|arr| arr.boxed())
460
},
461
(BinaryView, _) => match to_type {
462
Utf8View => array
463
.as_any()
464
.downcast_ref::<BinaryViewArray>()
465
.unwrap()
466
.to_utf8view()
467
.map(|arr| arr.boxed()),
468
LargeBinary => Ok(binview_to::view_to_binary::<i64>(
469
array.as_any().downcast_ref().unwrap(),
470
)
471
.boxed()),
472
LargeList(inner) if matches!(inner.dtype, ArrowDataType::UInt8) => {
473
let bin_array = view_to_binary::<i64>(array.as_any().downcast_ref().unwrap());
474
Ok(binary_to_list(&bin_array, to_type.clone()).boxed())
475
},
476
_ => polars_bail!(InvalidOperation:
477
"casting from {from_type:?} to {to_type:?} not supported",
478
),
479
},
480
(LargeList(_), LargeList(_)) => {
481
cast_list::<i64>(array.as_any().downcast_ref().unwrap(), to_type, options)
482
.map(|x| x.boxed())
483
},
484
(List(lhs), LargeList(rhs)) if lhs == rhs => {
485
Ok(cast_list_to_large_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
486
},
487
(LargeList(lhs), List(rhs)) if lhs == rhs => {
488
Ok(cast_large_to_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
489
},
490
491
(_, List(to)) => {
492
// cast primitive to list's primitive
493
let values = cast(array, &to.dtype, options)?;
494
// create offsets, where if array.len() = 2, we have [0,1,2]
495
let offsets = (0..=array.len() as i32).collect::<Vec<_>>();
496
// SAFETY: offsets _are_ monotonically increasing
497
let offsets = unsafe { Offsets::new_unchecked(offsets) };
498
499
let list_array = ListArray::<i32>::new(to_type.clone(), offsets.into(), values, None);
500
501
Ok(Box::new(list_array))
502
},
503
504
(_, LargeList(to)) if from_type != &LargeBinary => {
505
// cast primitive to list's primitive
506
let values = cast(array, &to.dtype, options)?;
507
// create offsets, where if array.len() = 2, we have [0,1,2]
508
let offsets = (0..=array.len() as i64).collect::<Vec<_>>();
509
// SAFETY: offsets _are_ monotonically increasing
510
let offsets = unsafe { Offsets::new_unchecked(offsets) };
511
512
let list_array = ListArray::<i64>::new(
513
to_type.clone(),
514
offsets.into(),
515
values,
516
array.validity().cloned(),
517
);
518
519
Ok(Box::new(list_array))
520
},
521
522
(Utf8View, _) => {
523
let arr = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
524
525
match to_type {
526
BinaryView => Ok(arr.to_binview().boxed()),
527
LargeUtf8 => Ok(binview_to::utf8view_to_utf8::<i64>(arr).boxed()),
528
UInt8 => utf8view_to_primitive_dyn::<u8>(arr, to_type, options),
529
UInt16 => utf8view_to_primitive_dyn::<u16>(arr, to_type, options),
530
UInt32 => utf8view_to_primitive_dyn::<u32>(arr, to_type, options),
531
UInt64 => utf8view_to_primitive_dyn::<u64>(arr, to_type, options),
532
Int8 => utf8view_to_primitive_dyn::<i8>(arr, to_type, options),
533
Int16 => utf8view_to_primitive_dyn::<i16>(arr, to_type, options),
534
Int32 => utf8view_to_primitive_dyn::<i32>(arr, to_type, options),
535
Int64 => utf8view_to_primitive_dyn::<i64>(arr, to_type, options),
536
#[cfg(feature = "dtype-i128")]
537
Int128 => utf8view_to_primitive_dyn::<i128>(arr, to_type, options),
538
Float32 => utf8view_to_primitive_dyn::<f32>(arr, to_type, options),
539
Float64 => utf8view_to_primitive_dyn::<f64>(arr, to_type, options),
540
Timestamp(time_unit, None) => {
541
utf8view_to_naive_timestamp_dyn(array, time_unit.to_owned())
542
},
543
Timestamp(time_unit, Some(time_zone)) => utf8view_to_timestamp(
544
array.as_any().downcast_ref().unwrap(),
545
RFC3339,
546
time_zone.clone(),
547
time_unit.to_owned(),
548
)
549
.map(|arr| arr.boxed()),
550
Date32 => utf8view_to_date32_dyn(array),
551
#[cfg(feature = "dtype-decimal")]
552
Decimal(precision, scale) => {
553
Ok(binview_to_decimal(&arr.to_binview(), Some(*precision), *scale).to_boxed())
554
},
555
_ => polars_bail!(InvalidOperation:
556
"casting from {from_type:?} to {to_type:?} not supported",
557
),
558
}
559
},
560
561
(_, Boolean) => match from_type {
562
UInt8 => primitive_to_boolean_dyn::<u8>(array, to_type.clone()),
563
UInt16 => primitive_to_boolean_dyn::<u16>(array, to_type.clone()),
564
UInt32 => primitive_to_boolean_dyn::<u32>(array, to_type.clone()),
565
UInt64 => primitive_to_boolean_dyn::<u64>(array, to_type.clone()),
566
Int8 => primitive_to_boolean_dyn::<i8>(array, to_type.clone()),
567
Int16 => primitive_to_boolean_dyn::<i16>(array, to_type.clone()),
568
Int32 => primitive_to_boolean_dyn::<i32>(array, to_type.clone()),
569
Int64 => primitive_to_boolean_dyn::<i64>(array, to_type.clone()),
570
#[cfg(feature = "dtype-i128")]
571
Int128 => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
572
Float32 => primitive_to_boolean_dyn::<f32>(array, to_type.clone()),
573
Float64 => primitive_to_boolean_dyn::<f64>(array, to_type.clone()),
574
Decimal(_, _) => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
575
_ => polars_bail!(InvalidOperation:
576
"casting from {from_type:?} to {to_type:?} not supported",
577
),
578
},
579
(Boolean, _) => match to_type {
580
UInt8 => boolean_to_primitive_dyn::<u8>(array),
581
UInt16 => boolean_to_primitive_dyn::<u16>(array),
582
UInt32 => boolean_to_primitive_dyn::<u32>(array),
583
UInt64 => boolean_to_primitive_dyn::<u64>(array),
584
Int8 => boolean_to_primitive_dyn::<i8>(array),
585
Int16 => boolean_to_primitive_dyn::<i16>(array),
586
Int32 => boolean_to_primitive_dyn::<i32>(array),
587
Int64 => boolean_to_primitive_dyn::<i64>(array),
588
#[cfg(feature = "dtype-i128")]
589
Int128 => boolean_to_primitive_dyn::<i128>(array),
590
Float32 => boolean_to_primitive_dyn::<f32>(array),
591
Float64 => boolean_to_primitive_dyn::<f64>(array),
592
Utf8View => boolean_to_utf8view_dyn(array),
593
BinaryView => boolean_to_binaryview_dyn(array),
594
_ => polars_bail!(InvalidOperation:
595
"casting from {from_type:?} to {to_type:?} not supported",
596
),
597
},
598
(_, BinaryView) => from_to_binview(array, from_type, to_type).map(|arr| arr.boxed()),
599
(_, Utf8View) => match from_type {
600
LargeUtf8 => Ok(utf8_to_utf8view(
601
array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap(),
602
)
603
.boxed()),
604
Utf8 => Ok(
605
utf8_to_utf8view(array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap()).boxed(),
606
),
607
#[cfg(feature = "dtype-decimal")]
608
Decimal(_, _) => Ok(decimal_to_utf8view_dyn(array).boxed()),
609
_ => from_to_binview(array, from_type, to_type)
610
.map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed()),
611
},
612
(Utf8, _) => match to_type {
613
LargeUtf8 => Ok(Box::new(utf8_to_large_utf8(
614
array.as_any().downcast_ref().unwrap(),
615
))),
616
_ => polars_bail!(InvalidOperation:
617
"casting from {from_type:?} to {to_type:?} not supported",
618
),
619
},
620
(LargeUtf8, _) => match to_type {
621
LargeBinary => Ok(utf8_to_binary::<i64>(
622
array.as_any().downcast_ref().unwrap(),
623
to_type.clone(),
624
)
625
.boxed()),
626
_ => polars_bail!(InvalidOperation:
627
"casting from {from_type:?} to {to_type:?} not supported",
628
),
629
},
630
(_, LargeUtf8) => match from_type {
631
UInt8 => primitive_to_utf8_dyn::<u8, i64>(array),
632
LargeBinary => {
633
binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
634
.map(|x| x.boxed())
635
},
636
_ => polars_bail!(InvalidOperation:
637
"casting from {from_type:?} to {to_type:?} not supported",
638
),
639
},
640
641
(Binary, _) => match to_type {
642
LargeBinary => Ok(Box::new(binary_to_large_binary(
643
array.as_any().downcast_ref().unwrap(),
644
to_type.clone(),
645
))),
646
_ => polars_bail!(InvalidOperation:
647
"casting from {from_type:?} to {to_type:?} not supported",
648
),
649
},
650
651
(LargeBinary, _) => match to_type {
652
UInt8 => binary_to_primitive_dyn::<i64, u8>(array, to_type, options),
653
UInt16 => binary_to_primitive_dyn::<i64, u16>(array, to_type, options),
654
UInt32 => binary_to_primitive_dyn::<i64, u32>(array, to_type, options),
655
UInt64 => binary_to_primitive_dyn::<i64, u64>(array, to_type, options),
656
Int8 => binary_to_primitive_dyn::<i64, i8>(array, to_type, options),
657
Int16 => binary_to_primitive_dyn::<i64, i16>(array, to_type, options),
658
Int32 => binary_to_primitive_dyn::<i64, i32>(array, to_type, options),
659
Int64 => binary_to_primitive_dyn::<i64, i64>(array, to_type, options),
660
#[cfg(feature = "dtype-i128")]
661
Int128 => binary_to_primitive_dyn::<i64, i128>(array, to_type, options),
662
Float32 => binary_to_primitive_dyn::<i64, f32>(array, to_type, options),
663
Float64 => binary_to_primitive_dyn::<i64, f64>(array, to_type, options),
664
Binary => {
665
binary_large_to_binary(array.as_any().downcast_ref().unwrap(), to_type.clone())
666
.map(|x| x.boxed())
667
},
668
LargeUtf8 => {
669
binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
670
.map(|x| x.boxed())
671
},
672
_ => polars_bail!(InvalidOperation:
673
"casting from {from_type:?} to {to_type:?} not supported",
674
),
675
},
676
(FixedSizeBinary(_), _) => match to_type {
677
Binary => Ok(fixed_size_binary_binary::<i32>(
678
array.as_any().downcast_ref().unwrap(),
679
to_type.clone(),
680
)
681
.boxed()),
682
LargeBinary => Ok(fixed_size_binary_binary::<i64>(
683
array.as_any().downcast_ref().unwrap(),
684
to_type.clone(),
685
)
686
.boxed()),
687
_ => polars_bail!(InvalidOperation:
688
"casting from {from_type:?} to {to_type:?} not supported",
689
),
690
},
691
// start numeric casts
692
(UInt8, UInt16) => primitive_to_primitive_dyn::<u8, u16>(array, to_type, as_options),
693
(UInt8, UInt32) => primitive_to_primitive_dyn::<u8, u32>(array, to_type, as_options),
694
(UInt8, UInt64) => primitive_to_primitive_dyn::<u8, u64>(array, to_type, as_options),
695
(UInt8, Int8) => primitive_to_primitive_dyn::<u8, i8>(array, to_type, options),
696
(UInt8, Int16) => primitive_to_primitive_dyn::<u8, i16>(array, to_type, options),
697
(UInt8, Int32) => primitive_to_primitive_dyn::<u8, i32>(array, to_type, options),
698
(UInt8, Int64) => primitive_to_primitive_dyn::<u8, i64>(array, to_type, options),
699
#[cfg(feature = "dtype-i128")]
700
(UInt8, Int128) => primitive_to_primitive_dyn::<u8, i128>(array, to_type, options),
701
(UInt8, Float32) => primitive_to_primitive_dyn::<u8, f32>(array, to_type, as_options),
702
(UInt8, Float64) => primitive_to_primitive_dyn::<u8, f64>(array, to_type, as_options),
703
(UInt8, Decimal(p, s)) => integer_to_decimal_dyn::<u8>(array, *p, *s),
704
705
(UInt16, UInt8) => primitive_to_primitive_dyn::<u16, u8>(array, to_type, options),
706
(UInt16, UInt32) => primitive_to_primitive_dyn::<u16, u32>(array, to_type, as_options),
707
(UInt16, UInt64) => primitive_to_primitive_dyn::<u16, u64>(array, to_type, as_options),
708
(UInt16, Int8) => primitive_to_primitive_dyn::<u16, i8>(array, to_type, options),
709
(UInt16, Int16) => primitive_to_primitive_dyn::<u16, i16>(array, to_type, options),
710
(UInt16, Int32) => primitive_to_primitive_dyn::<u16, i32>(array, to_type, options),
711
(UInt16, Int64) => primitive_to_primitive_dyn::<u16, i64>(array, to_type, options),
712
#[cfg(feature = "dtype-i128")]
713
(UInt16, Int128) => primitive_to_primitive_dyn::<u16, i128>(array, to_type, options),
714
(UInt16, Float32) => primitive_to_primitive_dyn::<u16, f32>(array, to_type, as_options),
715
(UInt16, Float64) => primitive_to_primitive_dyn::<u16, f64>(array, to_type, as_options),
716
(UInt16, Decimal(p, s)) => integer_to_decimal_dyn::<u16>(array, *p, *s),
717
718
(UInt32, UInt8) => primitive_to_primitive_dyn::<u32, u8>(array, to_type, options),
719
(UInt32, UInt16) => primitive_to_primitive_dyn::<u32, u16>(array, to_type, options),
720
(UInt32, UInt64) => primitive_to_primitive_dyn::<u32, u64>(array, to_type, as_options),
721
(UInt32, Int8) => primitive_to_primitive_dyn::<u32, i8>(array, to_type, options),
722
(UInt32, Int16) => primitive_to_primitive_dyn::<u32, i16>(array, to_type, options),
723
(UInt32, Int32) => primitive_to_primitive_dyn::<u32, i32>(array, to_type, options),
724
(UInt32, Int64) => primitive_to_primitive_dyn::<u32, i64>(array, to_type, options),
725
#[cfg(feature = "dtype-i128")]
726
(UInt32, Int128) => primitive_to_primitive_dyn::<u32, i128>(array, to_type, options),
727
(UInt32, Float32) => primitive_to_primitive_dyn::<u32, f32>(array, to_type, as_options),
728
(UInt32, Float64) => primitive_to_primitive_dyn::<u32, f64>(array, to_type, as_options),
729
(UInt32, Decimal(p, s)) => integer_to_decimal_dyn::<u32>(array, *p, *s),
730
731
(UInt64, UInt8) => primitive_to_primitive_dyn::<u64, u8>(array, to_type, options),
732
(UInt64, UInt16) => primitive_to_primitive_dyn::<u64, u16>(array, to_type, options),
733
(UInt64, UInt32) => primitive_to_primitive_dyn::<u64, u32>(array, to_type, options),
734
(UInt64, Int8) => primitive_to_primitive_dyn::<u64, i8>(array, to_type, options),
735
(UInt64, Int16) => primitive_to_primitive_dyn::<u64, i16>(array, to_type, options),
736
(UInt64, Int32) => primitive_to_primitive_dyn::<u64, i32>(array, to_type, options),
737
(UInt64, Int64) => primitive_to_primitive_dyn::<u64, i64>(array, to_type, options),
738
#[cfg(feature = "dtype-i128")]
739
(UInt64, Int128) => primitive_to_primitive_dyn::<u64, i128>(array, to_type, options),
740
(UInt64, Float32) => primitive_to_primitive_dyn::<u64, f32>(array, to_type, as_options),
741
(UInt64, Float64) => primitive_to_primitive_dyn::<u64, f64>(array, to_type, as_options),
742
(UInt64, Decimal(p, s)) => integer_to_decimal_dyn::<u64>(array, *p, *s),
743
744
(Int8, UInt8) => primitive_to_primitive_dyn::<i8, u8>(array, to_type, options),
745
(Int8, UInt16) => primitive_to_primitive_dyn::<i8, u16>(array, to_type, options),
746
(Int8, UInt32) => primitive_to_primitive_dyn::<i8, u32>(array, to_type, options),
747
(Int8, UInt64) => primitive_to_primitive_dyn::<i8, u64>(array, to_type, options),
748
(Int8, Int16) => primitive_to_primitive_dyn::<i8, i16>(array, to_type, as_options),
749
(Int8, Int32) => primitive_to_primitive_dyn::<i8, i32>(array, to_type, as_options),
750
(Int8, Int64) => primitive_to_primitive_dyn::<i8, i64>(array, to_type, as_options),
751
#[cfg(feature = "dtype-i128")]
752
(Int8, Int128) => primitive_to_primitive_dyn::<i8, i128>(array, to_type, as_options),
753
(Int8, Float32) => primitive_to_primitive_dyn::<i8, f32>(array, to_type, as_options),
754
(Int8, Float64) => primitive_to_primitive_dyn::<i8, f64>(array, to_type, as_options),
755
(Int8, Decimal(p, s)) => integer_to_decimal_dyn::<i8>(array, *p, *s),
756
757
(Int16, UInt8) => primitive_to_primitive_dyn::<i16, u8>(array, to_type, options),
758
(Int16, UInt16) => primitive_to_primitive_dyn::<i16, u16>(array, to_type, options),
759
(Int16, UInt32) => primitive_to_primitive_dyn::<i16, u32>(array, to_type, options),
760
(Int16, UInt64) => primitive_to_primitive_dyn::<i16, u64>(array, to_type, options),
761
(Int16, Int8) => primitive_to_primitive_dyn::<i16, i8>(array, to_type, options),
762
(Int16, Int32) => primitive_to_primitive_dyn::<i16, i32>(array, to_type, as_options),
763
(Int16, Int64) => primitive_to_primitive_dyn::<i16, i64>(array, to_type, as_options),
764
#[cfg(feature = "dtype-i128")]
765
(Int16, Int128) => primitive_to_primitive_dyn::<i16, i128>(array, to_type, as_options),
766
(Int16, Float32) => primitive_to_primitive_dyn::<i16, f32>(array, to_type, as_options),
767
(Int16, Float64) => primitive_to_primitive_dyn::<i16, f64>(array, to_type, as_options),
768
(Int16, Decimal(p, s)) => integer_to_decimal_dyn::<i16>(array, *p, *s),
769
770
(Int32, UInt8) => primitive_to_primitive_dyn::<i32, u8>(array, to_type, options),
771
(Int32, UInt16) => primitive_to_primitive_dyn::<i32, u16>(array, to_type, options),
772
(Int32, UInt32) => primitive_to_primitive_dyn::<i32, u32>(array, to_type, options),
773
(Int32, UInt64) => primitive_to_primitive_dyn::<i32, u64>(array, to_type, options),
774
(Int32, Int8) => primitive_to_primitive_dyn::<i32, i8>(array, to_type, options),
775
(Int32, Int16) => primitive_to_primitive_dyn::<i32, i16>(array, to_type, options),
776
(Int32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, as_options),
777
#[cfg(feature = "dtype-i128")]
778
(Int32, Int128) => primitive_to_primitive_dyn::<i32, i128>(array, to_type, as_options),
779
(Int32, Float32) => primitive_to_primitive_dyn::<i32, f32>(array, to_type, as_options),
780
(Int32, Float64) => primitive_to_primitive_dyn::<i32, f64>(array, to_type, as_options),
781
(Int32, Decimal(p, s)) => integer_to_decimal_dyn::<i32>(array, *p, *s),
782
783
(Int64, UInt8) => primitive_to_primitive_dyn::<i64, u8>(array, to_type, options),
784
(Int64, UInt16) => primitive_to_primitive_dyn::<i64, u16>(array, to_type, options),
785
(Int64, UInt32) => primitive_to_primitive_dyn::<i64, u32>(array, to_type, options),
786
(Int64, UInt64) => primitive_to_primitive_dyn::<i64, u64>(array, to_type, options),
787
(Int64, Int8) => primitive_to_primitive_dyn::<i64, i8>(array, to_type, options),
788
(Int64, Int16) => primitive_to_primitive_dyn::<i64, i16>(array, to_type, options),
789
(Int64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
790
#[cfg(feature = "dtype-i128")]
791
(Int64, Int128) => primitive_to_primitive_dyn::<i64, i128>(array, to_type, options),
792
(Int64, Float32) => primitive_to_primitive_dyn::<i64, f32>(array, to_type, options),
793
(Int64, Float64) => primitive_to_primitive_dyn::<i64, f64>(array, to_type, as_options),
794
(Int64, Decimal(p, s)) => integer_to_decimal_dyn::<i64>(array, *p, *s),
795
796
#[cfg(feature = "dtype-i128")]
797
(Int128, UInt8) => primitive_to_primitive_dyn::<i128, u8>(array, to_type, options),
798
#[cfg(feature = "dtype-i128")]
799
(Int128, UInt16) => primitive_to_primitive_dyn::<i128, u16>(array, to_type, options),
800
#[cfg(feature = "dtype-i128")]
801
(Int128, UInt32) => primitive_to_primitive_dyn::<i128, u32>(array, to_type, options),
802
#[cfg(feature = "dtype-i128")]
803
(Int128, UInt64) => primitive_to_primitive_dyn::<i128, u64>(array, to_type, options),
804
#[cfg(feature = "dtype-i128")]
805
(Int128, Int8) => primitive_to_primitive_dyn::<i128, i8>(array, to_type, options),
806
#[cfg(feature = "dtype-i128")]
807
(Int128, Int16) => primitive_to_primitive_dyn::<i128, i16>(array, to_type, options),
808
#[cfg(feature = "dtype-i128")]
809
(Int128, Int32) => primitive_to_primitive_dyn::<i128, i32>(array, to_type, options),
810
#[cfg(feature = "dtype-i128")]
811
(Int128, Int64) => primitive_to_primitive_dyn::<i128, i64>(array, to_type, options),
812
#[cfg(feature = "dtype-i128")]
813
(Int128, Float32) => primitive_to_primitive_dyn::<i128, f32>(array, to_type, options),
814
#[cfg(feature = "dtype-i128")]
815
(Int128, Float64) => primitive_to_primitive_dyn::<i128, f64>(array, to_type, as_options),
816
#[cfg(feature = "dtype-i128")]
817
(Int128, Decimal(p, s)) => integer_to_decimal_dyn::<i128>(array, *p, *s),
818
819
(Float16, Float32) => {
820
let from = array.as_any().downcast_ref().unwrap();
821
Ok(f16_to_f32(from).boxed())
822
},
823
824
(Float32, UInt8) => primitive_to_primitive_dyn::<f32, u8>(array, to_type, options),
825
(Float32, UInt16) => primitive_to_primitive_dyn::<f32, u16>(array, to_type, options),
826
(Float32, UInt32) => primitive_to_primitive_dyn::<f32, u32>(array, to_type, options),
827
(Float32, UInt64) => primitive_to_primitive_dyn::<f32, u64>(array, to_type, options),
828
(Float32, Int8) => primitive_to_primitive_dyn::<f32, i8>(array, to_type, options),
829
(Float32, Int16) => primitive_to_primitive_dyn::<f32, i16>(array, to_type, options),
830
(Float32, Int32) => primitive_to_primitive_dyn::<f32, i32>(array, to_type, options),
831
(Float32, Int64) => primitive_to_primitive_dyn::<f32, i64>(array, to_type, options),
832
(Float32, Int128) => primitive_to_primitive_dyn::<f32, i128>(array, to_type, options),
833
(Float32, Float64) => primitive_to_primitive_dyn::<f32, f64>(array, to_type, as_options),
834
(Float32, Decimal(p, s)) => float_to_decimal_dyn::<f32>(array, *p, *s),
835
836
(Float64, UInt8) => primitive_to_primitive_dyn::<f64, u8>(array, to_type, options),
837
(Float64, UInt16) => primitive_to_primitive_dyn::<f64, u16>(array, to_type, options),
838
(Float64, UInt32) => primitive_to_primitive_dyn::<f64, u32>(array, to_type, options),
839
(Float64, UInt64) => primitive_to_primitive_dyn::<f64, u64>(array, to_type, options),
840
(Float64, Int8) => primitive_to_primitive_dyn::<f64, i8>(array, to_type, options),
841
(Float64, Int16) => primitive_to_primitive_dyn::<f64, i16>(array, to_type, options),
842
(Float64, Int32) => primitive_to_primitive_dyn::<f64, i32>(array, to_type, options),
843
(Float64, Int64) => primitive_to_primitive_dyn::<f64, i64>(array, to_type, options),
844
(Float64, Int128) => primitive_to_primitive_dyn::<f64, i128>(array, to_type, options),
845
(Float64, Float32) => primitive_to_primitive_dyn::<f64, f32>(array, to_type, options),
846
(Float64, Decimal(p, s)) => float_to_decimal_dyn::<f64>(array, *p, *s),
847
848
(Decimal(_, _), UInt8) => decimal_to_integer_dyn::<u8>(array),
849
(Decimal(_, _), UInt16) => decimal_to_integer_dyn::<u16>(array),
850
(Decimal(_, _), UInt32) => decimal_to_integer_dyn::<u32>(array),
851
(Decimal(_, _), UInt64) => decimal_to_integer_dyn::<u64>(array),
852
(Decimal(_, _), Int8) => decimal_to_integer_dyn::<i8>(array),
853
(Decimal(_, _), Int16) => decimal_to_integer_dyn::<i16>(array),
854
(Decimal(_, _), Int32) => decimal_to_integer_dyn::<i32>(array),
855
(Decimal(_, _), Int64) => decimal_to_integer_dyn::<i64>(array),
856
(Decimal(_, _), Int128) => decimal_to_integer_dyn::<i128>(array),
857
(Decimal(_, _), Float32) => decimal_to_float_dyn::<f32>(array),
858
(Decimal(_, _), Float64) => decimal_to_float_dyn::<f64>(array),
859
(Decimal(_, _), Decimal(to_p, to_s)) => decimal_to_decimal_dyn(array, *to_p, *to_s),
860
// end numeric casts
861
862
// temporal casts
863
(Int32, Date32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
864
(Int32, Time32(TimeUnit::Second)) => primitive_dyn!(array, int32_to_time32s),
865
(Int32, Time32(TimeUnit::Millisecond)) => primitive_dyn!(array, int32_to_time32ms),
866
// No support for microsecond/nanosecond with i32
867
(Date32, Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
868
(Date32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, options),
869
(Time32(_), Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
870
(Int64, Date64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
871
// No support for second/milliseconds with i64
872
(Int64, Time64(TimeUnit::Microsecond)) => primitive_dyn!(array, int64_to_time64us),
873
(Int64, Time64(TimeUnit::Nanosecond)) => primitive_dyn!(array, int64_to_time64ns),
874
875
(Date64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
876
(Date64, Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
877
(Time64(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
878
(Date32, Date64) => primitive_dyn!(array, date32_to_date64),
879
(Date64, Date32) => primitive_dyn!(array, date64_to_date32),
880
(Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
881
primitive_dyn!(array, time32s_to_time32ms)
882
},
883
(Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
884
primitive_dyn!(array, time32ms_to_time32s)
885
},
886
(Time32(from_unit), Time64(to_unit)) => {
887
primitive_dyn!(array, time32_to_time64, *from_unit, *to_unit)
888
},
889
(Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
890
primitive_dyn!(array, time64us_to_time64ns)
891
},
892
(Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
893
primitive_dyn!(array, time64ns_to_time64us)
894
},
895
(Time64(from_unit), Time32(to_unit)) => {
896
primitive_dyn!(array, time64_to_time32, *from_unit, *to_unit)
897
},
898
(Timestamp(_, _), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
899
(Int64, Timestamp(_, _)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
900
(Timestamp(from_unit, _), Timestamp(to_unit, tz)) => {
901
primitive_dyn!(array, timestamp_to_timestamp, *from_unit, *to_unit, tz)
902
},
903
(Timestamp(from_unit, _), Date32) => primitive_dyn!(array, timestamp_to_date32, *from_unit),
904
(Timestamp(from_unit, _), Date64) => primitive_dyn!(array, timestamp_to_date64, *from_unit),
905
906
(Int64, Duration(_)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
907
(Duration(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
908
909
// Not supported by Polars.
910
// (Interval(IntervalUnit::DayTime), Interval(IntervalUnit::MonthDayNano)) => {
911
// primitive_dyn!(array, days_ms_to_months_days_ns)
912
// },
913
// (Interval(IntervalUnit::YearMonth), Interval(IntervalUnit::MonthDayNano)) => {
914
// primitive_dyn!(array, months_to_months_days_ns)
915
// },
916
_ => polars_bail!(InvalidOperation:
917
"casting from {from_type:?} to {to_type:?} not supported",
918
),
919
}
920
}
921
922
/// Attempts to encode an array into an `ArrayDictionary` with index
923
/// type K and value (dictionary) type value_type
924
///
925
/// K is the key type
926
fn cast_to_dictionary<K: DictionaryKey>(
927
array: &dyn Array,
928
dict_value_type: &ArrowDataType,
929
options: CastOptionsImpl,
930
) -> PolarsResult<Box<dyn Array>> {
931
let array = cast(array, dict_value_type, options)?;
932
let array = array.as_ref();
933
match *dict_value_type {
934
ArrowDataType::Int8 => primitive_to_dictionary_dyn::<i8, K>(array),
935
ArrowDataType::Int16 => primitive_to_dictionary_dyn::<i16, K>(array),
936
ArrowDataType::Int32 => primitive_to_dictionary_dyn::<i32, K>(array),
937
ArrowDataType::Int64 => primitive_to_dictionary_dyn::<i64, K>(array),
938
ArrowDataType::UInt8 => primitive_to_dictionary_dyn::<u8, K>(array),
939
ArrowDataType::UInt16 => primitive_to_dictionary_dyn::<u16, K>(array),
940
ArrowDataType::UInt32 => primitive_to_dictionary_dyn::<u32, K>(array),
941
ArrowDataType::UInt64 => primitive_to_dictionary_dyn::<u64, K>(array),
942
ArrowDataType::BinaryView => {
943
binview_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
944
.map(|arr| arr.boxed())
945
},
946
ArrowDataType::Utf8View => {
947
utf8view_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
948
.map(|arr| arr.boxed())
949
},
950
ArrowDataType::LargeUtf8 => utf8_to_dictionary_dyn::<i64, K>(array),
951
ArrowDataType::LargeBinary => binary_to_dictionary_dyn::<i64, K>(array),
952
ArrowDataType::Time64(_) => primitive_to_dictionary_dyn::<i64, K>(array),
953
ArrowDataType::Timestamp(_, _) => primitive_to_dictionary_dyn::<i64, K>(array),
954
ArrowDataType::Date32 => primitive_to_dictionary_dyn::<i32, K>(array),
955
_ => polars_bail!(ComputeError:
956
"unsupported output type for dictionary packing: {dict_value_type:?}"
957
),
958
}
959
}
960
961
fn from_to_binview(
962
array: &dyn Array,
963
from_type: &ArrowDataType,
964
to_type: &ArrowDataType,
965
) -> PolarsResult<BinaryViewArray> {
966
use ArrowDataType::*;
967
let binview = match from_type {
968
UInt8 => primitive_to_binview_dyn::<u8>(array),
969
UInt16 => primitive_to_binview_dyn::<u16>(array),
970
UInt32 => primitive_to_binview_dyn::<u32>(array),
971
UInt64 => primitive_to_binview_dyn::<u64>(array),
972
Int8 => primitive_to_binview_dyn::<i8>(array),
973
Int16 => primitive_to_binview_dyn::<i16>(array),
974
Int32 => primitive_to_binview_dyn::<i32>(array),
975
Int64 => primitive_to_binview_dyn::<i64>(array),
976
Int128 => primitive_to_binview_dyn::<i128>(array),
977
Float32 => primitive_to_binview_dyn::<f32>(array),
978
Float64 => primitive_to_binview_dyn::<f64>(array),
979
Binary => binary_to_binview::<i32>(array.as_any().downcast_ref().unwrap()),
980
FixedSizeBinary(_) => fixed_size_binary_to_binview(array.as_any().downcast_ref().unwrap()),
981
LargeBinary => binary_to_binview::<i64>(array.as_any().downcast_ref().unwrap()),
982
_ => polars_bail!(InvalidOperation:
983
"casting from {from_type:?} to {to_type:?} not supported",
984
),
985
};
986
Ok(binview)
987
}
988
989
#[cfg(test)]
990
mod tests {
991
use arrow::offset::OffsetsBuffer;
992
use polars_error::PolarsError;
993
994
use super::*;
995
996
/// When cfg(test), offsets for ``View``s generated by
997
/// cast_list_uint8_to_binary() are limited to max value of 3, so buffers
998
/// need to be split aggressively.
999
#[test]
1000
fn cast_list_uint8_to_binary_across_buffer_max_size() {
1001
let dtype =
1002
ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1003
let values = PrimitiveArray::from_slice((0u8..20).collect::<Vec<_>>()).boxed();
1004
let list_u8 = ListArray::try_new(
1005
dtype,
1006
unsafe { OffsetsBuffer::new_unchecked(vec![0, 13, 18, 20].into()) },
1007
values,
1008
None,
1009
)
1010
.unwrap();
1011
1012
let binary = cast(
1013
&list_u8,
1014
&ArrowDataType::BinaryView,
1015
CastOptionsImpl::default(),
1016
)
1017
.unwrap();
1018
let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1019
assert_eq!(
1020
binary_array
1021
.values_iter()
1022
.map(|s| s.to_vec())
1023
.collect::<Vec<Vec<u8>>>(),
1024
vec![
1025
vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
1026
vec![13, 14, 15, 16, 17],
1027
vec![18, 19]
1028
]
1029
);
1030
// max offset of 15 so we need to split:
1031
assert_eq!(
1032
binary_array
1033
.data_buffers()
1034
.iter()
1035
.map(|buf| buf.len())
1036
.collect::<Vec<_>>(),
1037
vec![13, 7]
1038
);
1039
}
1040
1041
/// Arrow spec requires views to fit in a single buffer. When cfg(test),
1042
/// buffers generated by cast_list_uint8_to_binary are of size 15 or
1043
/// smaller, so a list of size 16 should cause an error.
1044
#[test]
1045
fn cast_list_uint8_to_binary_errors_too_large_list() {
1046
let values = PrimitiveArray::from_slice(vec![0u8; 16]);
1047
let dtype =
1048
ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1049
let list_u8 = ListArray::new(
1050
dtype,
1051
OffsetsBuffer::one_with_length(16),
1052
values.boxed(),
1053
None,
1054
);
1055
1056
let err = cast(
1057
&list_u8,
1058
&ArrowDataType::BinaryView,
1059
CastOptionsImpl::default(),
1060
)
1061
.unwrap_err();
1062
assert!(matches!(
1063
err,
1064
PolarsError::InvalidOperation(msg)
1065
if msg.as_ref() == "when casting to BinaryView, list lengths must be <= 15"
1066
));
1067
}
1068
1069
/// When all views are <=12, cast_list_uint8_to_binary drops buffers in the
1070
/// result because all views are inline.
1071
#[test]
1072
fn cast_list_uint8_to_binary_drops_small_buffers() {
1073
let values = PrimitiveArray::from_slice(vec![10u8; 12]);
1074
let dtype =
1075
ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1076
let list_u8 = ListArray::new(
1077
dtype,
1078
OffsetsBuffer::one_with_length(12),
1079
values.boxed(),
1080
None,
1081
);
1082
let binary = cast(
1083
&list_u8,
1084
&ArrowDataType::BinaryView,
1085
CastOptionsImpl::default(),
1086
)
1087
.unwrap();
1088
let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1089
assert!(binary_array.data_buffers().is_empty());
1090
assert_eq!(
1091
binary_array
1092
.values_iter()
1093
.map(|s| s.to_vec())
1094
.collect::<Vec<Vec<u8>>>(),
1095
vec![vec![10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],]
1096
);
1097
}
1098
}
1099
1100